2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
110 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
111 #define WCOREDUMP(x) (x & 0200)
114 #include <afs/afsint.h>
115 #include <afs/assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
170 #include <afs/afsutil.h>
171 #include <afs/fileutil.h>
172 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
180 #include <afs/afssyscalls.h>
184 #include "partition.h"
185 #include "daemon_com.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
196 /*@+fcnmacros +macrofcndecl@*/
199 extern off64_t afs_lseek(int FD, off64_t O, int F);
200 #endif /*S_SPLINT_S */
201 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
202 #define afs_stat stat64
203 #define afs_fstat fstat64
204 #define afs_open open64
205 #define afs_fopen fopen64
206 #else /* !O_LARGEFILE */
208 extern off_t afs_lseek(int FD, off_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
211 #define afs_stat stat
212 #define afs_fstat fstat
213 #define afs_open open
214 #define afs_fopen fopen
215 #endif /* !O_LARGEFILE */
216 /*@=fcnmacros =macrofcndecl@*/
219 extern void *calloc();
221 static char *TimeStamp(time_t clock, int precision);
224 int debug; /* -d flag */
225 extern int Testing; /* -n flag */
226 int ListInodeOption; /* -i flag */
227 int ShowRootFiles; /* -r flag */
228 int RebuildDirs; /* -sal flag */
229 int Parallel = 4; /* -para X flag */
230 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
231 int forceR = 0; /* -b flag */
232 int ShowLog = 0; /* -showlog flag */
233 int ShowSuid = 0; /* -showsuid flag */
234 int ShowMounts = 0; /* -showmounts flag */
235 int orphans = ORPH_IGNORE; /* -orphans option */
240 int useSyslog = 0; /* -syslog flag */
241 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
250 #define MAXPARALLEL 32
252 int OKToZap; /* -o flag */
253 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
254 * in the volume header */
256 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
258 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
260 Device fileSysDevice; /* The device number of the current
261 * partition being salvaged */
265 char *fileSysPath; /* The path of the mounted partition currently
266 * being salvaged, i.e. the directory
267 * containing the volume headers */
269 char *fileSysPathName; /* NT needs this to make name pretty in log. */
270 IHandle_t *VGLinkH; /* Link handle for current volume group. */
271 int VGLinkH_cnt; /* # of references to lnk handle. */
272 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
274 char *fileSysDeviceName; /* The block device where the file system
275 * being salvaged was mounted */
276 char *filesysfulldev;
278 int VolumeChanged; /* Set by any routine which would change the volume in
279 * a way which would require callback is to be broken if the
280 * volume was put back on line by an active file server */
282 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
284 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
285 int inodeFd; /* File descriptor for inode file */
288 struct VnodeInfo vnodeInfo[nVNODECLASSES];
291 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
292 int nVolumes; /* Number of volumes (read-write and read-only)
293 * in volume summary */
299 /* Forward declarations */
300 /*@printflike@*/ void Log(const char *format, ...);
301 /*@printflike@*/ void Abort(const char *format, ...);
302 static int IsVnodeOrphaned(VnodeId vnode);
304 /* Uniquifier stored in the Inode */
309 return (u & 0x3fffff);
311 #if defined(AFS_SGI_EXMAG)
312 return (u & SGI_UNIQMASK);
315 #endif /* AFS_SGI_EXMAG */
320 BadError(register int aerror)
322 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
324 return 0; /* otherwise may be transient, e.g. EMFILE */
329 char *save_args[MAX_ARGS];
331 extern pthread_t main_thread;
332 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
335 /* Get the salvage lock if not already held. Hold until process exits. */
337 ObtainSalvageLock(void)
343 (int)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
344 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
345 if (salvageLock == (int)INVALID_HANDLE_VALUE) {
347 "salvager: There appears to be another salvager running! Aborted.\n");
352 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
353 if (salvageLock < 0) {
355 "salvager: can't open salvage lock file %s, aborting\n",
356 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
359 #ifdef AFS_DARWIN_ENV
360 if (flock(salvageLock, LOCK_EX) == -1) {
362 if (lockf(salvageLock, F_LOCK, 0) == -1) {
365 "salvager: There appears to be another salvager running! Aborted.\n");
372 #ifdef AFS_SGI_XFS_IOPS_ENV
373 /* Check if the given partition is mounted. For XFS, the root inode is not a
374 * constant. So we check the hard way.
377 IsPartitionMounted(char *part)
380 struct mntent *mntent;
382 assert(mntfp = setmntent(MOUNTED, "r"));
383 while (mntent = getmntent(mntfp)) {
384 if (!strcmp(part, mntent->mnt_dir))
389 return mntent ? 1 : 1;
392 /* Check if the given inode is the root of the filesystem. */
393 #ifndef AFS_SGI_XFS_IOPS_ENV
395 IsRootInode(struct afs_stat *status)
398 * The root inode is not a fixed value in XFS partitions. So we need to
399 * see if the partition is in the list of mounted partitions. This only
400 * affects the SalvageFileSys path, so we check there.
402 return (status->st_ino == ROOTINODE);
407 #ifndef AFS_NAMEI_ENV
408 /* We don't want to salvage big files filesystems, since we can't put volumes on
412 CheckIfBigFilesFS(char *mountPoint, char *devName)
414 struct superblock fs;
417 if (strncmp(devName, "/dev/", 5)) {
418 (void)sprintf(name, "/dev/%s", devName);
420 (void)strcpy(name, devName);
423 if (ReadSuper(&fs, name) < 0) {
424 Log("Unable to read superblock. Not salvaging partition %s.\n",
428 if (IsBigFilesFileSystem(&fs)) {
429 Log("Partition %s is a big files filesystem, not salvaging.\n",
439 #define HDSTR "\\Device\\Harddisk"
440 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
442 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
447 static int dowarn = 1;
449 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
451 if (strncmp(res, HDSTR, HDLEN)) {
454 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
455 res, HDSTR, p1->devName);
459 d1 = atoi(&res[HDLEN]);
461 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
463 if (strncmp(res, HDSTR, HDLEN)) {
466 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
467 res, HDSTR, p2->devName);
471 d2 = atoi(&res[HDLEN]);
476 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
479 /* This assumes that two partitions with the same device number divided by
480 * PartsPerDisk are on the same disk.
483 SalvageFileSysParallel(struct DiskPartition64 *partP)
486 struct DiskPartition64 *partP;
487 int pid; /* Pid for this job */
488 int jobnumb; /* Log file job number */
489 struct job *nextjob; /* Next partition on disk to salvage */
491 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
492 struct job *thisjob = 0;
493 static int numjobs = 0;
494 static int jobcount = 0;
500 char logFileName[256];
504 /* We have a partition to salvage. Copy it into thisjob */
505 thisjob = (struct job *)malloc(sizeof(struct job));
507 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
510 memset(thisjob, 0, sizeof(struct job));
511 thisjob->partP = partP;
512 thisjob->jobnumb = jobcount;
514 } else if (jobcount == 0) {
515 /* We are asking to wait for all jobs (partp == 0), yet we never
518 Log("No file system partitions named %s* found; not salvaged\n",
519 VICE_PARTITION_PREFIX);
523 if (debug || Parallel == 1) {
525 SalvageFileSys(thisjob->partP, 0);
532 /* Check to see if thisjob is for a disk that we are already
533 * salvaging. If it is, link it in as the next job to do. The
534 * jobs array has 1 entry per disk being salvages. numjobs is
535 * the total number of disks currently being salvaged. In
536 * order to keep thejobs array compact, when a disk is
537 * completed, the hightest element in the jobs array is moved
538 * down to now open slot.
540 for (j = 0; j < numjobs; j++) {
541 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
542 /* On same disk, add it to this list and return */
543 thisjob->nextjob = jobs[j]->nextjob;
544 jobs[j]->nextjob = thisjob;
551 /* Loop until we start thisjob or until all existing jobs are finished */
552 while (thisjob || (!partP && (numjobs > 0))) {
553 startjob = -1; /* No new job to start */
555 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
556 /* Either the max jobs are running or we have to wait for all
557 * the jobs to finish. In either case, we wait for at least one
558 * job to finish. When it's done, clean up after it.
560 pid = wait(&wstatus);
562 for (j = 0; j < numjobs; j++) { /* Find which job it is */
563 if (pid == jobs[j]->pid)
567 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
568 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
571 numjobs--; /* job no longer running */
572 oldjob = jobs[j]; /* remember */
573 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
574 free(oldjob); /* free the old job */
576 /* If there is another partition on the disk to salvage, then
577 * say we will start it (startjob). If not, then put thisjob there
578 * and say we will start it.
580 if (jobs[j]) { /* Another partitions to salvage */
581 startjob = j; /* Will start it */
582 } else { /* There is not another partition to salvage */
584 jobs[j] = thisjob; /* Add thisjob */
586 startjob = j; /* Will start it */
588 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
589 startjob = -1; /* Don't start it - already running */
593 /* We don't have to wait for a job to complete */
595 jobs[numjobs] = thisjob; /* Add this job */
597 startjob = numjobs; /* Will start it */
601 /* Start up a new salvage job on a partition in job slot "startjob" */
602 if (startjob != -1) {
604 Log("Starting salvage of file system partition %s\n",
605 jobs[startjob]->partP->name);
607 /* For NT, we not only fork, but re-exec the salvager. Pass in the
608 * commands and pass the child job number via the data path.
611 nt_SalvagePartition(jobs[startjob]->partP->name,
612 jobs[startjob]->jobnumb);
613 jobs[startjob]->pid = pid;
618 jobs[startjob]->pid = pid;
624 for (fd = 0; fd < 16; fd++)
631 openlog("salvager", LOG_PID, useSyslogFacility);
635 (void)afs_snprintf(logFileName, sizeof logFileName,
637 AFSDIR_SERVER_SLVGLOG_FILEPATH,
638 jobs[startjob]->jobnumb);
639 logFile = afs_fopen(logFileName, "w");
644 SalvageFileSys1(jobs[startjob]->partP, 0);
649 } /* while ( thisjob || (!partP && numjobs > 0) ) */
651 /* If waited for all jobs to complete, now collect log files and return */
653 if (!useSyslog) /* if syslogging - no need to collect */
656 for (i = 0; i < jobcount; i++) {
657 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
658 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
659 if ((passLog = afs_fopen(logFileName, "r"))) {
660 while (fgets(buf, sizeof(buf), passLog)) {
665 (void)unlink(logFileName);
674 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
676 if (!canfork || debug || Fork() == 0) {
677 SalvageFileSys1(partP, singleVolumeNumber);
678 if (canfork && !debug) {
683 Wait("SalvageFileSys");
687 get_DevName(char *pbuffer, char *wpath)
689 char pbuf[128], *ptr;
690 strcpy(pbuf, pbuffer);
691 ptr = (char *)strrchr(pbuf, '/');
697 ptr = (char *)strrchr(pbuffer, '/');
699 strcpy(pbuffer, ptr + 1);
706 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
709 char inodeListPath[256];
710 static char tmpDevName[100];
711 static char wpath[100];
712 struct VolumeSummary *vsp, *esp;
715 fileSysPartition = partP;
716 fileSysDevice = fileSysPartition->device;
717 fileSysPathName = VPartitionPath(fileSysPartition);
720 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
721 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
722 name = partP->devName;
724 fileSysPath = fileSysPathName;
725 strcpy(tmpDevName, partP->devName);
726 name = get_DevName(tmpDevName, wpath);
727 fileSysDeviceName = name;
728 filesysfulldev = wpath;
731 VLockPartition(partP->name);
732 if (singleVolumeNumber || ForceSalvage)
735 ForceSalvage = UseTheForceLuke(fileSysPath);
737 if (singleVolumeNumber) {
738 /* salvageserver already setup fssync conn for us */
739 if ((programType != salvageServer) && !VConnectFS()) {
740 Abort("Couldn't connect to file server\n");
742 AskOffline(singleVolumeNumber, partP->name);
745 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
746 partP->name, name, (Testing ? "(READONLY mode)" : ""));
748 Log("***Forced salvage of all volumes on this partition***\n");
753 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
760 assert((dirp = opendir(fileSysPath)) != NULL);
761 while ((dp = readdir(dirp))) {
762 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
763 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
765 Log("Removing old salvager temp files %s\n", dp->d_name);
766 strcpy(npath, fileSysPath);
768 strcat(npath, dp->d_name);
774 tdir = (tmpdir ? tmpdir : fileSysPath);
776 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
777 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
779 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
782 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
783 unlink(inodeListPath);
787 /* Using nt_unlink here since we're really using the delete on close
788 * semantics of unlink. In most places in the salvager, we really do
789 * mean to unlink the file at that point. Those places have been
790 * modified to actually do that so that the NT crt can be used there.
793 _open_osfhandle((long)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
794 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
796 inodeFd = afs_open(inodeListPath, O_RDONLY);
797 unlink(inodeListPath);
800 Abort("Temporary file %s is missing...\n", inodeListPath);
801 if (ListInodeOption) {
805 /* enumerate volumes in the partition.
806 * figure out sets of read-only + rw volumes.
807 * salvage each set, read-only volumes first, then read-write.
808 * Fix up inodes on last volume in set (whether it is read-write
811 GetVolumeSummary(singleVolumeNumber);
813 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
814 i < nVolumesInInodeFile; i = j) {
815 VolumeId rwvid = inodeSummary[i].RWvolumeId;
817 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
819 VolumeId vid = inodeSummary[j].volumeId;
820 struct VolumeSummary *tsp;
821 /* Scan volume list (from partition root directory) looking for the
822 * current rw volume number in the volume list from the inode scan.
823 * If there is one here that is not in the inode volume list,
825 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
827 DeleteExtraVolumeHeaderFile(vsp);
829 /* Now match up the volume summary info from the root directory with the
830 * entry in the volume list obtained from scanning inodes */
831 inodeSummary[j].volSummary = NULL;
832 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
833 if (tsp->header.id == vid) {
834 inodeSummary[j].volSummary = tsp;
840 /* Salvage the group of volumes (several read-only + 1 read/write)
841 * starting with the current read-only volume we're looking at.
843 SalvageVolumeGroup(&inodeSummary[i], j - i);
846 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
847 for (; vsp < esp; vsp++) {
849 DeleteExtraVolumeHeaderFile(vsp);
852 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
853 RemoveTheForce(fileSysPath);
855 if (!Testing && singleVolumeNumber) {
856 AskOnline(singleVolumeNumber, fileSysPartition->name);
858 /* Step through the volumeSummary list and set all volumes on-line.
859 * The volumes were taken off-line in GetVolumeSummary.
861 for (j = 0; j < nVolumes; j++) {
862 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
866 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
867 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
870 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
874 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
877 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", vsp->fileName, (Testing ? "would have been " : ""));
879 unlink(vsp->fileName);
883 CompareInodes(const void *_p1, const void *_p2)
885 register const struct ViceInodeInfo *p1 = _p1;
886 register const struct ViceInodeInfo *p2 = _p2;
887 if (p1->u.vnode.vnodeNumber == INODESPECIAL
888 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
889 VolumeId p1rwid, p2rwid;
891 (p1->u.vnode.vnodeNumber ==
892 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
894 (p2->u.vnode.vnodeNumber ==
895 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
900 if (p1->u.vnode.vnodeNumber == INODESPECIAL
901 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
902 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
903 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
904 if (p1->u.vnode.volumeId == p1rwid)
906 if (p2->u.vnode.volumeId == p2rwid)
908 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
910 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
911 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
912 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
914 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
916 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
918 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
920 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
922 /* The following tests are reversed, so that the most desirable
923 * of several similar inodes comes first */
924 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
926 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
927 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
931 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
932 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
937 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
939 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
940 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
944 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
945 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
950 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
952 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
953 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
957 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
958 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
963 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
965 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
966 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
970 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
971 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
980 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
981 register struct InodeSummary *summary)
983 int volume = ip->u.vnode.volumeId;
984 int rwvolume = volume;
985 register n, nSpecial;
986 register Unique maxunique;
989 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
991 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
993 rwvolume = ip->u.special.parentId;
994 /* This isn't quite right, as there could (in error) be different
995 * parent inodes in different special vnodes */
997 if (maxunique < ip->u.vnode.vnodeUniquifier)
998 maxunique = ip->u.vnode.vnodeUniquifier;
1002 summary->volumeId = volume;
1003 summary->RWvolumeId = rwvolume;
1004 summary->nInodes = n;
1005 summary->nSpecialInodes = nSpecial;
1006 summary->maxUniquifier = maxunique;
1010 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, int singleVolumeNumber, void *rock)
1012 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1013 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1014 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1019 * Collect list of inodes in file named by path. If a truly fatal error,
1020 * unlink the file and abort. For lessor errors, return -1. The file will
1021 * be unlinked by the caller.
1024 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1026 struct afs_stat status;
1028 struct ViceInodeInfo *ip;
1029 struct InodeSummary summary;
1030 char summaryFileName[50];
1033 char *dev = fileSysPath;
1034 char *wpath = fileSysPath;
1036 char *dev = fileSysDeviceName;
1037 char *wpath = filesysfulldev;
1039 char *part = fileSysPath;
1042 /* This file used to come from vfsck; cobble it up ourselves now... */
1044 ListViceInodes(dev, fileSysPath, path,
1045 singleVolumeNumber ? OnlyOneVolume : 0,
1046 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1048 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1052 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1054 if (forceSal && !ForceSalvage) {
1055 Log("***Forced salvage of all volumes on this partition***\n");
1058 inodeFd = afs_open(path, O_RDWR);
1059 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1061 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1063 tdir = (tmpdir ? tmpdir : part);
1065 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1066 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1068 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1069 "%s/salvage.temp.%d", tdir, getpid());
1071 summaryFile = afs_fopen(summaryFileName, "a+");
1072 if (summaryFile == NULL) {
1075 Abort("Unable to create inode summary file\n");
1077 if (!canfork || debug || Fork() == 0) {
1079 unsigned long st_size=(unsigned long) status.st_size;
1080 nInodes = st_size / sizeof(struct ViceInodeInfo);
1082 fclose(summaryFile);
1084 unlink(summaryFileName);
1085 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1086 RemoveTheForce(fileSysPath);
1088 struct VolumeSummary *vsp;
1091 GetVolumeSummary(singleVolumeNumber);
1093 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1095 DeleteExtraVolumeHeaderFile(vsp);
1098 Log("%s vice inodes on %s; not salvaged\n",
1099 singleVolumeNumber ? "No applicable" : "No", dev);
1102 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1104 fclose(summaryFile);
1107 unlink(summaryFileName);
1109 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1112 if (read(inodeFd, ip, st_size) != st_size) {
1113 fclose(summaryFile);
1116 unlink(summaryFileName);
1117 Abort("Unable to read inode table; %s not salvaged\n", dev);
1119 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1120 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1121 || write(inodeFd, ip, st_size) != st_size) {
1122 fclose(summaryFile);
1125 unlink(summaryFileName);
1126 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1130 CountVolumeInodes(ip, nInodes, &summary);
1131 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1132 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1133 fclose(summaryFile);
1137 summary.index += (summary.nInodes);
1138 nInodes -= summary.nInodes;
1139 ip += summary.nInodes;
1141 /* Following fflush is not fclose, because if it was debug mode would not work */
1142 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1143 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1144 fclose(summaryFile);
1148 if (canfork && !debug) {
1153 if (Wait("Inode summary") == -1) {
1154 fclose(summaryFile);
1157 unlink(summaryFileName);
1158 Exit(1); /* salvage of this partition aborted */
1161 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1162 if (status.st_size != 0) {
1164 unsigned long st_status=(unsigned long)status.st_size;
1165 inodeSummary = (struct InodeSummary *)malloc(st_status);
1166 assert(inodeSummary != NULL);
1167 /* For GNU we need to do lseek to get the file pointer moved. */
1168 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1169 ret = read(fileno(summaryFile), inodeSummary, st_status);
1170 assert(ret == st_status);
1172 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1173 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1174 fclose(summaryFile);
1176 unlink(summaryFileName);
1180 /* Comparison routine for volume sort.
1181 This is setup so that a read-write volume comes immediately before
1182 any read-only clones of that volume */
1184 CompareVolumes(const void *_p1, const void *_p2)
1186 register const struct VolumeSummary *p1 = _p1;
1187 register const struct VolumeSummary *p2 = _p2;
1188 if (p1->header.parent != p2->header.parent)
1189 return p1->header.parent < p2->header.parent ? -1 : 1;
1190 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1192 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1194 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1198 GetVolumeSummary(VolumeId singleVolumeNumber)
1201 afs_int32 nvols = 0;
1202 struct VolumeSummary *vsp, vs;
1203 struct VolumeDiskHeader diskHeader;
1206 /* Get headers from volume directory */
1207 if (chdir(fileSysPath) == -1 || (dirp = opendir(".")) == NULL)
1208 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1209 if (!singleVolumeNumber) {
1210 while ((dp = readdir(dirp))) {
1211 char *p = dp->d_name;
1212 p = strrchr(dp->d_name, '.');
1213 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1215 if ((fd = afs_open(dp->d_name, O_RDONLY)) != -1
1216 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1217 == sizeof(diskHeader)
1218 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1219 DiskToVolumeHeader(&vs.header, &diskHeader);
1227 dirp = opendir("."); /* No rewinddir for NT */
1234 (struct VolumeSummary *)malloc(nvols *
1235 sizeof(struct VolumeSummary));
1238 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1239 assert(volumeSummaryp != NULL);
1242 vsp = volumeSummaryp;
1243 while ((dp = readdir(dirp))) {
1244 char *p = dp->d_name;
1245 p = strrchr(dp->d_name, '.');
1246 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1249 if ((fd = afs_open(dp->d_name, O_RDONLY)) == -1
1250 || read(fd, &diskHeader, sizeof(diskHeader))
1251 != sizeof(diskHeader)
1252 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1257 if (!singleVolumeNumber) {
1259 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1264 char nameShouldBe[64];
1265 DiskToVolumeHeader(&vsp->header, &diskHeader);
1266 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1267 && vsp->header.parent != singleVolumeNumber) {
1268 if (programType == salvageServer) {
1269 #ifdef SALVSYNC_BUILD_CLIENT
1270 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1271 vsp->header.id, vsp->header.parent);
1272 if (SALVSYNC_LinkVolume(vsp->header.parent,
1274 fileSysPartition->name,
1276 Log("schedule request failed\n");
1279 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1281 Log("%u is a read-only volume; not salvaged\n",
1282 singleVolumeNumber);
1286 if (!singleVolumeNumber
1287 || (vsp->header.id == singleVolumeNumber
1288 || vsp->header.parent == singleVolumeNumber)) {
1289 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1290 VFORMAT, vsp->header.id);
1291 if (singleVolumeNumber)
1292 AskOffline(vsp->header.id, fileSysPartition->name);
1293 if (strcmp(nameShouldBe, dp->d_name)) {
1295 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1299 vsp->fileName = ToString(dp->d_name);
1309 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1313 /* Find the link table. This should be associated with the RW volume or, if
1314 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1317 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1318 struct ViceInodeInfo *allInodes)
1321 struct ViceInodeInfo *ip;
1323 for (i = 0; i < nVols; i++) {
1324 ip = allInodes + isp[i].index;
1325 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1326 if (ip[j].u.special.type == VI_LINKTABLE)
1327 return ip[j].inodeNumber;
1334 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1336 struct versionStamp version;
1339 if (!VALID_INO(ino))
1341 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1342 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1343 if (!VALID_INO(ino))
1345 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1346 isp->RWvolumeId, errno);
1347 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1348 fdP = IH_OPEN(VGLinkH);
1350 Abort("Can't open link table for volume %u (error = %d)\n",
1351 isp->RWvolumeId, errno);
1353 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1354 Abort("Can't truncate link table for volume %u (error = %d)\n",
1355 isp->RWvolumeId, errno);
1357 version.magic = LINKTABLEMAGIC;
1358 version.version = LINKTABLEVERSION;
1360 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1362 Abort("Can't truncate link table for volume %u (error = %d)\n",
1363 isp->RWvolumeId, errno);
1365 FDH_REALLYCLOSE(fdP);
1367 /* If the volume summary exits (i.e., the V*.vol header file exists),
1368 * then set this inode there as well.
1370 if (isp->volSummary)
1371 isp->volSummary->header.linkTable = ino;
1380 SVGParms_t *parms = (SVGParms_t *) arg;
1381 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1386 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1389 pthread_attr_t tattr;
1393 /* Initialize per volume global variables, even if later code does so */
1397 memset(&VolInfo, 0, sizeof(VolInfo));
1399 parms.svgp_inodeSummaryp = isp;
1400 parms.svgp_count = nVols;
1401 code = pthread_attr_init(&tattr);
1403 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1407 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1409 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1412 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1414 Log("Failed to create thread to salvage volume group %u\n",
1418 (void)pthread_join(tid, NULL);
1420 #endif /* AFS_NT40_ENV */
1423 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1425 struct ViceInodeInfo *inodes, *allInodes, *ip;
1426 int i, totalInodes, size, salvageTo;
1430 int dec_VGLinkH = 0;
1432 FdHandle_t *fdP = NULL;
1435 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1436 && isp->nSpecialInodes > 0);
1437 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1438 if (!ForceSalvage && QuickCheck(isp, nVols))
1441 if (ShowMounts && !haveRWvolume)
1443 if (canfork && !debug && Fork() != 0) {
1444 (void)Wait("Salvage volume group");
1447 for (i = 0, totalInodes = 0; i < nVols; i++)
1448 totalInodes += isp[i].nInodes;
1449 size = totalInodes * sizeof(struct ViceInodeInfo);
1450 inodes = (struct ViceInodeInfo *)malloc(size);
1451 allInodes = inodes - isp->index; /* this would the base of all the inodes
1452 * for the partition, if all the inodes
1453 * had been read into memory */
1455 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1457 assert(read(inodeFd, inodes, size) == size);
1459 /* Don't try to salvage a read write volume if there isn't one on this
1461 salvageTo = haveRWvolume ? 0 : 1;
1463 #ifdef AFS_NAMEI_ENV
1464 ino = FindLinkHandle(isp, nVols, allInodes);
1465 if (VALID_INO(ino)) {
1466 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1467 fdP = IH_OPEN(VGLinkH);
1469 if (!VALID_INO(ino) || fdP == NULL) {
1470 Log("%s link table for volume %u.\n",
1471 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1473 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1476 struct ViceInodeInfo *ip;
1477 CreateLinkTable(isp, ino);
1478 fdP = IH_OPEN(VGLinkH);
1479 /* Sync fake 1 link counts to the link table, now that it exists */
1481 for (i = 0; i < nVols; i++) {
1482 ip = allInodes + isp[i].index;
1483 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1485 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1487 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1495 FDH_REALLYCLOSE(fdP);
1497 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1500 /* Salvage in reverse order--read/write volume last; this way any
1501 * Inodes not referenced by the time we salvage the read/write volume
1502 * can be picked up by the read/write volume */
1503 /* ACTUALLY, that's not done right now--the inodes just vanish */
1504 for (i = nVols - 1; i >= salvageTo; i--) {
1506 struct InodeSummary *lisp = &isp[i];
1507 #ifdef AFS_NAMEI_ENV
1508 /* If only the RO is present on this partition, the link table
1509 * shows up as a RW volume special file. Need to make sure the
1510 * salvager doesn't try to salvage the non-existent RW.
1512 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1513 /* If this only special inode is the link table, continue */
1514 if (inodes->u.special.type == VI_LINKTABLE) {
1521 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1522 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1523 /* Check inodes twice. The second time do things seriously. This
1524 * way the whole RO volume can be deleted, below, if anything goes wrong */
1525 for (check = 1; check >= 0; check--) {
1527 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1529 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1530 if (rw && deleteMe) {
1531 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1532 * volume won't be called */
1538 if (rw && check == 1)
1540 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1541 MaybeZapVolume(lisp, "Vnode index", 0, check);
1547 /* Fix actual inode counts */
1549 Log("totalInodes %d\n",totalInodes);
1550 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1551 static int TraceBadLinkCounts = 0;
1552 #ifdef AFS_NAMEI_ENV
1553 if (VGLinkH->ih_ino == ip->inodeNumber) {
1554 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1555 VGLinkH_p1 = ip->u.param[0];
1556 continue; /* Deal with this last. */
1559 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1560 TraceBadLinkCounts--; /* Limit reports, per volume */
1561 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1563 while (ip->linkCount > 0) {
1564 /* below used to assert, not break */
1566 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1567 Log("idec failed. inode %s errno %d\n",
1568 PrintInode(NULL, ip->inodeNumber), errno);
1574 while (ip->linkCount < 0) {
1575 /* these used to be asserts */
1577 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1578 Log("iinc failed. inode %s errno %d\n",
1579 PrintInode(NULL, ip->inodeNumber), errno);
1586 #ifdef AFS_NAMEI_ENV
1587 while (dec_VGLinkH > 0) {
1588 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1589 Log("idec failed on link table, errno = %d\n", errno);
1593 while (dec_VGLinkH < 0) {
1594 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1595 Log("iinc failed on link table, errno = %d\n", errno);
1602 /* Directory consistency checks on the rw volume */
1604 SalvageVolume(isp, VGLinkH);
1605 IH_RELEASE(VGLinkH);
1607 if (canfork && !debug) {
1614 QuickCheck(register struct InodeSummary *isp, int nVols)
1616 /* Check headers BEFORE forking */
1620 for (i = 0; i < nVols; i++) {
1621 struct VolumeSummary *vs = isp[i].volSummary;
1622 VolumeDiskData volHeader;
1624 /* Don't salvage just because phantom rw volume is there... */
1625 /* (If a read-only volume exists, read/write inodes must also exist) */
1626 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1630 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1631 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1632 == sizeof(volHeader)
1633 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1634 && volHeader.dontSalvage == DONT_SALVAGE
1635 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1636 if (volHeader.inUse != 0) {
1637 volHeader.inUse = 0;
1638 volHeader.inService = 1;
1640 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1641 != sizeof(volHeader)) {
1657 /* SalvageVolumeHeaderFile
1659 * Salvage the top level V*.vol header file. Make sure the special files
1660 * exist and that there are no duplicates.
1662 * Calls SalvageHeader for each possible type of volume special file.
1666 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1667 register struct ViceInodeInfo *inodes, int RW,
1668 int check, int *deleteMe)
1672 register struct ViceInodeInfo *ip;
1673 int allinodesobsolete = 1;
1674 struct VolumeDiskHeader diskHeader;
1678 memset(&tempHeader, 0, sizeof(tempHeader));
1679 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1680 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1681 tempHeader.id = isp->volumeId;
1682 tempHeader.parent = isp->RWvolumeId;
1683 /* Check for duplicates (inodes are sorted by type field) */
1684 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1685 ip = &inodes[isp->index + i];
1686 if (ip->u.special.type == (ip + 1)->u.special.type) {
1688 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1692 for (i = 0; i < isp->nSpecialInodes; i++) {
1693 ip = &inodes[isp->index + i];
1694 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1696 Log("Rubbish header inode\n");
1699 Log("Rubbish header inode; deleted\n");
1700 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1701 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1702 if (!check && ip->u.special.type != VI_LINKTABLE)
1703 ip->linkCount--; /* Keep the inode around */
1704 allinodesobsolete = 0;
1708 if (allinodesobsolete) {
1715 VGLinkH_cnt++; /* one for every header. */
1717 if (!RW && !check && isp->volSummary) {
1718 ClearROInUseBit(isp->volSummary);
1722 for (i = 0; i < MAXINODETYPE; i++) {
1723 if (stuff[i].inodeType == VI_LINKTABLE) {
1724 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1725 * And we may have recreated the link table earlier, so set the
1726 * RW header as well.
1728 if (VALID_INO(VGLinkH->ih_ino)) {
1729 *stuff[i].inode = VGLinkH->ih_ino;
1733 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1737 if (isp->volSummary == NULL) {
1739 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1741 Log("No header file for volume %u\n", isp->volumeId);
1745 Log("No header file for volume %u; %screating %s/%s\n",
1746 isp->volumeId, (Testing ? "it would have been " : ""),
1747 fileSysPathName, name);
1748 headerFd = afs_open(name, O_RDWR | O_CREAT | O_TRUNC, 0644);
1749 assert(headerFd != -1);
1750 isp->volSummary = (struct VolumeSummary *)
1751 malloc(sizeof(struct VolumeSummary));
1752 isp->volSummary->fileName = ToString(name);
1755 /* hack: these two fields are obsolete... */
1756 isp->volSummary->header.volumeAcl = 0;
1757 isp->volSummary->header.volumeMountTable = 0;
1760 (&isp->volSummary->header, &tempHeader,
1761 sizeof(struct VolumeHeader))) {
1762 /* We often remove the name before calling us, so we make a fake one up */
1763 if (isp->volSummary->fileName) {
1764 strcpy(name, isp->volSummary->fileName);
1766 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1767 isp->volSummary->fileName = ToString(name);
1770 Log("Header file %s is damaged or no longer valid%s\n", name,
1771 (check ? "" : "; repairing"));
1775 headerFd = afs_open(name, O_RDWR | O_TRUNC, 0644);
1776 assert(headerFd != -1);
1780 memcpy(&isp->volSummary->header, &tempHeader,
1781 sizeof(struct VolumeHeader));
1784 Log("It would have written a new header file for volume %u\n",
1787 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1788 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1789 != sizeof(struct VolumeDiskHeader)) {
1790 Log("Couldn't rewrite volume header file!\n");
1797 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1798 isp->volSummary->header.volumeInfo);
1803 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1807 VolumeDiskData volumeInfo;
1808 struct versionStamp fileHeader;
1817 #ifndef AFS_NAMEI_ENV
1818 if (sp->inodeType == VI_LINKTABLE)
1821 if (*(sp->inode) == 0) {
1823 Log("Missing inode in volume header (%s)\n", sp->description);
1827 Log("Missing inode in volume header (%s); %s\n", sp->description,
1828 (Testing ? "it would have recreated it" : "recreating"));
1831 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1832 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1833 if (!VALID_INO(*(sp->inode)))
1835 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1836 sp->description, errno);
1841 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1842 fdP = IH_OPEN(specH);
1843 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1844 /* bail out early and destroy the volume */
1846 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1853 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1854 sp->description, errno);
1857 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1858 || header.fileHeader.magic != sp->stamp.magic)) {
1860 Log("Part of the header (%s) is corrupted\n", sp->description);
1861 FDH_REALLYCLOSE(fdP);
1865 Log("Part of the header (%s) is corrupted; recreating\n",
1869 if (sp->inodeType == VI_VOLINFO
1870 && header.volumeInfo.destroyMe == DESTROY_ME) {
1873 FDH_REALLYCLOSE(fdP);
1877 if (recreate && !Testing) {
1880 ("Internal error: recreating volume header (%s) in check mode\n",
1882 code = FDH_TRUNC(fdP, 0);
1884 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1885 sp->description, errno);
1887 /* The following code should be moved into vutil.c */
1888 if (sp->inodeType == VI_VOLINFO) {
1890 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1891 header.volumeInfo.stamp = sp->stamp;
1892 header.volumeInfo.id = isp->volumeId;
1893 header.volumeInfo.parentId = isp->RWvolumeId;
1894 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1895 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1896 isp->volumeId, isp->volumeId);
1897 header.volumeInfo.inService = 0;
1898 header.volumeInfo.blessed = 0;
1899 /* The + 1000 is a hack in case there are any files out in venus caches */
1900 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1901 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1902 header.volumeInfo.needsCallback = 0;
1903 gettimeofday(&tp, 0);
1904 header.volumeInfo.creationDate = tp.tv_sec;
1905 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1907 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1908 sp->description, errno);
1911 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1912 sizeof(header.volumeInfo));
1913 if (code != sizeof(header.volumeInfo)) {
1916 ("Unable to write volume header file (%s) (errno = %d)\n",
1917 sp->description, errno);
1918 Abort("Unable to write entire volume header file (%s)\n",
1922 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1924 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1925 sp->description, errno);
1927 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1928 if (code != sizeof(sp->stamp)) {
1931 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1932 sp->description, errno);
1934 ("Unable to write entire version stamp in volume header file (%s)\n",
1939 FDH_REALLYCLOSE(fdP);
1941 if (sp->inodeType == VI_VOLINFO) {
1942 VolInfo = header.volumeInfo;
1945 if (VolInfo.updateDate) {
1946 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1948 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1949 (Testing ? "it would have been " : ""), update);
1951 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1953 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1954 VolInfo.id, update);
1964 SalvageVnodes(register struct InodeSummary *rwIsp,
1965 register struct InodeSummary *thisIsp,
1966 register struct ViceInodeInfo *inodes, int check)
1968 int ilarge, ismall, ioffset, RW, nInodes;
1969 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1972 RW = (rwIsp == thisIsp);
1973 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1975 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1976 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1977 if (check && ismall == -1)
1980 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1981 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1982 return (ilarge == 0 && ismall == 0 ? 0 : -1);
1986 SalvageIndex(Inode ino, VnodeClass class, int RW,
1987 register struct ViceInodeInfo *ip, int nInodes,
1988 struct VolumeSummary *volSummary, int check)
1990 VolumeId volumeNumber;
1991 char buf[SIZEOF_LARGEDISKVNODE];
1992 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
1994 StreamHandle_t *file;
1995 struct VnodeClassInfo *vcp;
1997 afs_fsize_t vnodeLength;
1998 int vnodeIndex, nVnodes;
1999 afs_ino_str_t stmp1, stmp2;
2003 volumeNumber = volSummary->header.id;
2004 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2005 fdP = IH_OPEN(handle);
2006 assert(fdP != NULL);
2007 file = FDH_FDOPEN(fdP, "r+");
2008 assert(file != NULL);
2009 vcp = &VnodeClassInfo[class];
2010 size = OS_SIZE(fdP->fd_fd);
2012 nVnodes = (size / vcp->diskSize) - 1;
2014 assert((nVnodes + 1) * vcp->diskSize == size);
2015 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2019 for (vnodeIndex = 0;
2020 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2021 nVnodes--, vnodeIndex++) {
2022 if (vnode->type != vNull) {
2023 int vnodeChanged = 0;
2024 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2025 /* Log programs that belong to root (potentially suid root);
2026 * don't bother for read-only or backup volumes */
2027 #ifdef notdef /* This is done elsewhere */
2028 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2029 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2031 if (VNDISK_GET_INO(vnode) == 0) {
2033 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2034 memset(vnode, 0, vcp->diskSize);
2038 if (vcp->magic != vnode->vnodeMagic) {
2039 /* bad magic #, probably partially created vnode */
2040 Log("Partially allocated vnode %d deleted.\n",
2042 memset(vnode, 0, vcp->diskSize);
2046 /* ****** Should do a bit more salvage here: e.g. make sure
2047 * vnode type matches what it should be given the index */
2048 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2049 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2050 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2051 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2058 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2059 /* The following doesn't work, because the version number
2060 * is not maintained correctly by the file server */
2061 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2062 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2064 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2070 /* For RW volume, look for vnode with matching inode number;
2071 * if no such match, take the first determined by our sort
2073 register struct ViceInodeInfo *lip = ip;
2074 register int lnInodes = nInodes;
2076 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2077 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2086 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2087 /* "Matching" inode */
2091 vu = vnode->uniquifier;
2092 iu = ip->u.vnode.vnodeUniquifier;
2093 vd = vnode->dataVersion;
2094 id = ip->u.vnode.inodeDataVersion;
2096 * Because of the possibility of the uniquifier overflows (> 4M)
2097 * we compare them modulo the low 22-bits; we shouldn't worry
2098 * about mismatching since they shouldn't to many old
2099 * uniquifiers of the same vnode...
2101 if (IUnique(vu) != IUnique(iu)) {
2103 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2106 vnode->uniquifier = iu;
2107 #ifdef AFS_3DISPARES
2108 vnode->dataVersion = (id >= vd ?
2111 1887437 ? vd : id) :
2114 1887437 ? id : vd));
2116 #if defined(AFS_SGI_EXMAG)
2117 vnode->dataVersion = (id >= vd ?
2120 15099494 ? vd : id) :
2123 15099494 ? id : vd));
2125 vnode->dataVersion = (id > vd ? id : vd);
2126 #endif /* AFS_SGI_EXMAG */
2127 #endif /* AFS_3DISPARES */
2130 /* don't bother checking for vd > id any more, since
2131 * partial file transfers always result in this state,
2132 * and you can't do much else anyway (you've already
2133 * found the best data you can) */
2134 #ifdef AFS_3DISPARES
2135 if (!vnodeIsDirectory(vnodeNumber)
2136 && ((vd < id && (id - vd) < 1887437)
2137 || ((vd > id && (vd - id) > 1887437)))) {
2139 #if defined(AFS_SGI_EXMAG)
2140 if (!vnodeIsDirectory(vnodeNumber)
2141 && ((vd < id && (id - vd) < 15099494)
2142 || ((vd > id && (vd - id) > 15099494)))) {
2144 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2145 #endif /* AFS_SGI_EXMAG */
2148 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2149 vnode->dataVersion = id;
2154 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2157 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2159 VNDISK_SET_INO(vnode, ip->inodeNumber);
2164 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2166 VNDISK_SET_INO(vnode, ip->inodeNumber);
2169 VNDISK_GET_LEN(vnodeLength, vnode);
2170 if (ip->byteCount != vnodeLength) {
2173 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2178 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2179 VNDISK_SET_LEN(vnode, ip->byteCount);
2183 ip->linkCount--; /* Keep the inode around */
2186 } else { /* no matching inode */
2187 if (VNDISK_GET_INO(vnode) != 0
2188 || vnode->type == vDirectory) {
2189 /* No matching inode--get rid of the vnode */
2191 if (VNDISK_GET_INO(vnode)) {
2193 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2197 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2202 if (VNDISK_GET_INO(vnode)) {
2204 time_t serverModifyTime = vnode->serverModifyTime;
2205 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2209 time_t serverModifyTime = vnode->serverModifyTime;
2210 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2213 memset(vnode, 0, vcp->diskSize);
2216 /* Should not reach here becuase we checked for
2217 * (inodeNumber == 0) above. And where we zero the vnode,
2218 * we also goto vnodeDone.
2222 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2226 } /* VNDISK_GET_INO(vnode) != 0 */
2228 assert(!(vnodeChanged && check));
2229 if (vnodeChanged && !Testing) {
2231 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2232 (char *)vnode, vcp->diskSize)
2234 VolumeChanged = 1; /* For break call back */
2245 struct VnodeEssence *
2246 CheckVnodeNumber(VnodeId vnodeNumber)
2249 struct VnodeInfo *vip;
2252 class = vnodeIdToClass(vnodeNumber);
2253 vip = &vnodeInfo[class];
2254 offset = vnodeIdToBitNumber(vnodeNumber);
2255 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2259 CopyOnWrite(register struct DirSummary *dir)
2261 /* Copy the directory unconditionally if we are going to change it:
2262 * not just if was cloned.
2264 struct VnodeDiskObject vnode;
2265 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2266 Inode oldinode, newinode;
2269 if (dir->copied || Testing)
2271 DFlush(); /* Well justified paranoia... */
2274 IH_IREAD(vnodeInfo[vLarge].handle,
2275 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2277 assert(code == sizeof(vnode));
2278 oldinode = VNDISK_GET_INO(&vnode);
2279 /* Increment the version number by a whole lot to avoid problems with
2280 * clients that were promised new version numbers--but the file server
2281 * crashed before the versions were written to disk.
2284 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2285 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2287 assert(VALID_INO(newinode));
2288 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2290 VNDISK_SET_INO(&vnode, newinode);
2292 IH_IWRITE(vnodeInfo[vLarge].handle,
2293 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2295 assert(code == sizeof(vnode));
2297 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2298 fileSysDevice, newinode);
2299 /* Don't delete the original inode right away, because the directory is
2300 * still being scanned.
2306 * This function should either successfully create a new dir, or give up
2307 * and leave things the way they were. In particular, if it fails to write
2308 * the new dir properly, it should return w/o changing the reference to the
2312 CopyAndSalvage(register struct DirSummary *dir)
2314 struct VnodeDiskObject vnode;
2315 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2316 Inode oldinode, newinode;
2320 afs_int32 parentUnique = 1;
2321 struct VnodeEssence *vnodeEssence;
2325 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2327 IH_IREAD(vnodeInfo[vLarge].handle,
2328 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2330 assert(lcode == sizeof(vnode));
2331 oldinode = VNDISK_GET_INO(&vnode);
2332 /* Increment the version number by a whole lot to avoid problems with
2333 * clients that were promised new version numbers--but the file server
2334 * crashed before the versions were written to disk.
2337 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2338 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2340 assert(VALID_INO(newinode));
2341 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2343 /* Assign . and .. vnode numbers from dir and vnode.parent.
2344 * The uniquifier for . is in the vnode.
2345 * The uniquifier for .. might be set to a bogus value of 1 and
2346 * the salvager will later clean it up.
2348 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2349 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2352 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2354 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2359 /* didn't really build the new directory properly, let's just give up. */
2360 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2361 Log("Directory salvage returned code %d, continuing.\n", code);
2363 Log("also failed to decrement link count on new inode");
2367 Log("Checking the results of the directory salvage...\n");
2368 if (!DirOK(&newdir)) {
2369 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2370 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2375 VNDISK_SET_INO(&vnode, newinode);
2376 VNDISK_SET_LEN(&vnode, Length(&newdir));
2378 IH_IWRITE(vnodeInfo[vLarge].handle,
2379 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2381 assert(lcode == sizeof(vnode));
2384 nt_sync(fileSysDevice);
2386 sync(); /* this is slow, but hopefully rarely called. We don't have
2387 * an open FD on the file itself to fsync.
2391 vnodeInfo[vLarge].handle->ih_synced = 1;
2393 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2395 dir->dirHandle = newdir;
2399 JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
2402 struct VnodeEssence *vnodeEssence;
2403 afs_int32 dirOrphaned, todelete;
2405 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2407 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2408 if (vnodeEssence == NULL) {
2410 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2414 assert(Delete(&dir->dirHandle, name) == 0);
2419 #ifndef AFS_NAMEI_ENV
2420 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2421 * mount inode for the partition. If this inode were deleted, it would crash
2424 if (vnodeEssence->InodeNumber == 0) {
2425 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2428 assert(Delete(&dir->dirHandle, name) == 0);
2435 if (!(vnodeNumber & 1) && !Showmode
2436 && !(vnodeEssence->count || vnodeEssence->unique
2437 || vnodeEssence->modeBits)) {
2438 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2439 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2440 vnodeNumber, unique,
2441 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2446 assert(Delete(&dir->dirHandle, name) == 0);
2452 /* Check if the Uniquifiers match. If not, change the directory entry
2453 * so its unique matches the vnode unique. Delete if the unique is zero
2454 * or if the directory is orphaned.
2456 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2457 if (!vnodeEssence->unique
2458 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2459 /* This is an orphaned directory. Don't delete the . or ..
2460 * entry. Otherwise, it will get created in the next
2461 * salvage and deleted again here. So Just skip it.
2466 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2469 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2473 fid.Vnode = vnodeNumber;
2474 fid.Unique = vnodeEssence->unique;
2476 assert(Delete(&dir->dirHandle, name) == 0);
2478 assert(Create(&dir->dirHandle, name, &fid) == 0);
2481 return; /* no need to continue */
2484 if (strcmp(name, ".") == 0) {
2485 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2488 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2491 assert(Delete(&dir->dirHandle, ".") == 0);
2492 fid.Vnode = dir->vnodeNumber;
2493 fid.Unique = dir->unique;
2494 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2497 vnodeNumber = fid.Vnode; /* Get the new Essence */
2498 unique = fid.Unique;
2499 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2502 } else if (strcmp(name, "..") == 0) {
2505 struct VnodeEssence *dotdot;
2506 pa.Vnode = dir->parent;
2507 dotdot = CheckVnodeNumber(pa.Vnode);
2508 assert(dotdot != NULL); /* XXX Should not be assert */
2509 pa.Unique = dotdot->unique;
2511 pa.Vnode = dir->vnodeNumber;
2512 pa.Unique = dir->unique;
2514 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2516 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2519 assert(Delete(&dir->dirHandle, "..") == 0);
2520 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2523 vnodeNumber = pa.Vnode; /* Get the new Essence */
2525 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2527 dir->haveDotDot = 1;
2528 } else if (strncmp(name, ".__afs", 6) == 0) {
2530 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2534 assert(Delete(&dir->dirHandle, name) == 0);
2536 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2537 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2540 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2541 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2542 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2543 && !(vnodeEssence->modeBits & 0111)) {
2549 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2550 vnodeEssence->InodeNumber);
2553 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2557 size = FDH_SIZE(fdP);
2559 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2560 FDH_REALLYCLOSE(fdP);
2567 code = FDH_READ(fdP, buf, size);
2570 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2571 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2572 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2573 Testing ? "would convert" : "converted");
2574 vnodeEssence->modeBits |= 0111;
2575 vnodeEssence->changed = 1;
2576 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2577 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2578 dir->name ? dir->name : "??", name, buf);
2580 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2581 dir->vname, vnodeNumber, size, code);
2583 FDH_REALLYCLOSE(fdP);
2586 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2587 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2588 if (vnodeIdToClass(vnodeNumber) == vLarge
2589 && vnodeEssence->name == NULL) {
2591 if ((n = (char *)malloc(strlen(name) + 1)))
2593 vnodeEssence->name = n;
2596 /* The directory entry points to the vnode. Check to see if the
2597 * vnode points back to the directory. If not, then let the
2598 * directory claim it (else it might end up orphaned). Vnodes
2599 * already claimed by another directory are deleted from this
2600 * directory: hardlinks to the same vnode are not allowed
2601 * from different directories.
2603 if (vnodeEssence->parent != dir->vnodeNumber) {
2604 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2605 /* Vnode does not point back to this directory.
2606 * Orphaned dirs cannot claim a file (it may belong to
2607 * another non-orphaned dir).
2610 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2612 vnodeEssence->parent = dir->vnodeNumber;
2613 vnodeEssence->changed = 1;
2615 /* Vnode was claimed by another directory */
2618 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2619 } else if (vnodeNumber == 1) {
2620 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2622 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2627 assert(Delete(&dir->dirHandle, name) == 0);
2632 /* This directory claims the vnode */
2633 vnodeEssence->claimed = 1;
2635 vnodeEssence->count--;
2639 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2641 register struct VnodeInfo *vip = &vnodeInfo[class];
2642 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2643 char buf[SIZEOF_LARGEDISKVNODE];
2644 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2646 StreamHandle_t *file;
2651 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2652 fdP = IH_OPEN(vip->handle);
2653 assert(fdP != NULL);
2654 file = FDH_FDOPEN(fdP, "r+");
2655 assert(file != NULL);
2656 size = OS_SIZE(fdP->fd_fd);
2658 vip->nVnodes = (size / vcp->diskSize) - 1;
2659 if (vip->nVnodes > 0) {
2660 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2661 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2662 assert((vip->vnodes = (struct VnodeEssence *)
2663 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2664 if (class == vLarge) {
2665 assert((vip->inodes = (Inode *)
2666 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2675 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2676 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2677 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2678 nVnodes--, vnodeIndex++) {
2679 if (vnode->type != vNull) {
2680 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2681 afs_fsize_t vnodeLength;
2682 vip->nAllocatedVnodes++;
2683 vep->count = vnode->linkCount;
2684 VNDISK_GET_LEN(vnodeLength, vnode);
2685 vep->blockCount = nBlocks(vnodeLength);
2686 vip->volumeBlockCount += vep->blockCount;
2687 vep->parent = vnode->parent;
2688 vep->unique = vnode->uniquifier;
2689 if (*maxu < vnode->uniquifier)
2690 *maxu = vnode->uniquifier;
2691 vep->modeBits = vnode->modeBits;
2692 vep->InodeNumber = VNDISK_GET_INO(vnode);
2693 vep->type = vnode->type;
2694 vep->author = vnode->author;
2695 vep->owner = vnode->owner;
2696 vep->group = vnode->group;
2697 if (vnode->type == vDirectory) {
2698 assert(class == vLarge);
2699 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2708 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2710 struct VnodeEssence *parentvp;
2716 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2717 && GetDirName(vp->parent, parentvp, path)) {
2719 strcat(path, vp->name);
2725 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2726 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2729 IsVnodeOrphaned(VnodeId vnode)
2731 struct VnodeEssence *vep;
2734 return (1); /* Vnode zero does not exist */
2736 return (0); /* The root dir vnode is always claimed */
2737 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2738 if (!vep || !vep->claimed)
2739 return (1); /* Vnode is not claimed - it is orphaned */
2741 return (IsVnodeOrphaned(vep->parent));
2745 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2746 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2749 static struct DirSummary dir;
2750 static struct DirHandle dirHandle;
2751 struct VnodeEssence *parent;
2752 static char path[MAXPATHLEN];
2755 if (dirVnodeInfo->vnodes[i].salvaged)
2756 return; /* already salvaged */
2759 dirVnodeInfo->vnodes[i].salvaged = 1;
2761 if (dirVnodeInfo->inodes[i] == 0)
2762 return; /* Not allocated to a directory */
2764 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2765 if (dirVnodeInfo->vnodes[i].parent) {
2766 Log("Bad parent, vnode 1; %s...\n",
2767 (Testing ? "skipping" : "salvaging"));
2768 dirVnodeInfo->vnodes[i].parent = 0;
2769 dirVnodeInfo->vnodes[i].changed = 1;
2772 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2773 if (parent && parent->salvaged == 0)
2774 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2775 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2776 rootdir, rootdirfound);
2779 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2780 dir.unique = dirVnodeInfo->vnodes[i].unique;
2783 dir.parent = dirVnodeInfo->vnodes[i].parent;
2784 dir.haveDot = dir.haveDotDot = 0;
2785 dir.ds_linkH = alinkH;
2786 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2787 dirVnodeInfo->inodes[i]);
2789 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2792 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2793 (Testing ? "skipping" : "salvaging"));
2796 CopyAndSalvage(&dir);
2800 dirHandle = dir.dirHandle;
2803 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2804 &dirVnodeInfo->vnodes[i], path);
2807 /* If enumeration failed for random reasons, we will probably delete
2808 * too much stuff, so we guard against this instead.
2810 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2813 /* Delete the old directory if it was copied in order to salvage.
2814 * CopyOnWrite has written the new inode # to the disk, but we still
2815 * have the old one in our local structure here. Thus, we idec the
2819 if (dir.copied && !Testing) {
2820 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2822 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2825 /* Remember rootdir DirSummary _after_ it has been judged */
2826 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2827 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2835 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2837 /* This routine, for now, will only be called for read-write volumes */
2839 int BlocksInVolume = 0, FilesInVolume = 0;
2840 register VnodeClass class;
2841 struct DirSummary rootdir, oldrootdir;
2842 struct VnodeInfo *dirVnodeInfo;
2843 struct VnodeDiskObject vnode;
2844 VolumeDiskData volHeader;
2846 int orphaned, rootdirfound = 0;
2847 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2848 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2849 struct VnodeEssence *vep;
2852 afs_sfsize_t nBytes;
2854 VnodeId LFVnode, ThisVnode;
2855 Unique LFUnique, ThisUnique;
2858 vid = rwIsp->volSummary->header.id;
2859 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2860 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2861 assert(nBytes == sizeof(volHeader));
2862 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2863 assert(volHeader.destroyMe != DESTROY_ME);
2864 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2866 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2868 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2871 dirVnodeInfo = &vnodeInfo[vLarge];
2872 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2873 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2877 nt_sync(fileSysDevice);
2879 sync(); /* This used to be done lower level, for every dir */
2886 /* Parse each vnode looking for orphaned vnodes and
2887 * connect them to the tree as orphaned (if requested).
2889 oldrootdir = rootdir;
2890 for (class = 0; class < nVNODECLASSES; class++) {
2891 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2892 vep = &(vnodeInfo[class].vnodes[v]);
2893 ThisVnode = bitNumberToVnodeNumber(v, class);
2894 ThisUnique = vep->unique;
2896 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2897 continue; /* Ignore unused, claimed, and root vnodes */
2899 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2900 * entry in this vnode had incremented the parent link count (In
2901 * JudgeEntry()). We need to go to the parent and decrement that
2902 * link count. But if the parent's unique is zero, then the parent
2903 * link count was not incremented in JudgeEntry().
2905 if (class == vLarge) { /* directory vnode */
2906 pv = vnodeIdToBitNumber(vep->parent);
2907 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2908 vnodeInfo[vLarge].vnodes[pv].count++;
2912 continue; /* If no rootdir, can't attach orphaned files */
2914 /* Here we attach orphaned files and directories into the
2915 * root directory, LVVnode, making sure link counts stay correct.
2917 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2918 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2919 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2921 /* Update this orphaned vnode's info. Its parent info and
2922 * link count (do for orphaned directories and files).
2924 vep->parent = LFVnode; /* Parent is the root dir */
2925 vep->unique = LFUnique;
2928 vep->count--; /* Inc link count (root dir will pt to it) */
2930 /* If this orphaned vnode is a directory, change '..'.
2931 * The name of the orphaned dir/file is unknown, so we
2932 * build a unique name. No need to CopyOnWrite the directory
2933 * since it is not connected to tree in BK or RO volume and
2934 * won't be visible there.
2936 if (class == vLarge) {
2940 /* Remove and recreate the ".." entry in this orphaned directory */
2941 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2942 vnodeInfo[class].inodes[v]);
2944 pa.Unique = LFUnique;
2945 assert(Delete(&dh, "..") == 0);
2946 assert(Create(&dh, "..", &pa) == 0);
2948 /* The original parent's link count was decremented above.
2949 * Here we increment the new parent's link count.
2951 pv = vnodeIdToBitNumber(LFVnode);
2952 vnodeInfo[vLarge].vnodes[pv].count--;
2956 /* Go to the root dir and add this entry. The link count of the
2957 * root dir was incremented when ".." was created. Try 10 times.
2959 for (j = 0; j < 10; j++) {
2960 pa.Vnode = ThisVnode;
2961 pa.Unique = ThisUnique;
2963 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
2965 vLarge) ? "__ORPHANDIR__" :
2966 "__ORPHANFILE__"), ThisVnode,
2969 CopyOnWrite(&rootdir);
2970 code = Create(&rootdir.dirHandle, npath, &pa);
2974 ThisUnique += 50; /* Try creating a different file */
2977 Log("Attaching orphaned %s to volume's root dir as %s\n",
2978 ((class == vLarge) ? "directory" : "file"), npath);
2980 } /* for each vnode in the class */
2981 } /* for each class of vnode */
2983 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
2985 if (!oldrootdir.copied && rootdir.copied) {
2987 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
2990 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
2993 DFlush(); /* Flush the changes */
2994 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
2995 Log("Cannot attach orphaned files and directories: Root directory not found\n");
2996 orphans = ORPH_IGNORE;
2999 /* Write out all changed vnodes. Orphaned files and directories
3000 * will get removed here also (if requested).
3002 for (class = 0; class < nVNODECLASSES; class++) {
3003 int nVnodes = vnodeInfo[class].nVnodes;
3004 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3005 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3006 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3007 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3008 for (i = 0; i < nVnodes; i++) {
3009 register struct VnodeEssence *vnp = &vnodes[i];
3010 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3012 /* If the vnode is good but is unclaimed (not listed in
3013 * any directory entries), then it is orphaned.
3016 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3017 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3021 if (vnp->changed || vnp->count) {
3025 IH_IREAD(vnodeInfo[class].handle,
3026 vnodeIndexOffset(vcp, vnodeNumber),
3027 (char *)&vnode, sizeof(vnode));
3028 assert(nBytes == sizeof(vnode));
3030 vnode.parent = vnp->parent;
3031 oldCount = vnode.linkCount;
3032 vnode.linkCount = vnode.linkCount - vnp->count;
3035 orphaned = IsVnodeOrphaned(vnodeNumber);
3037 if (!vnp->todelete) {
3038 /* Orphans should have already been attached (if requested) */
3039 assert(orphans != ORPH_ATTACH);
3040 oblocks += vnp->blockCount;
3043 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3045 BlocksInVolume -= vnp->blockCount;
3047 if (VNDISK_GET_INO(&vnode)) {
3049 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3052 memset(&vnode, 0, sizeof(vnode));
3054 } else if (vnp->count) {
3056 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3059 vnode.modeBits = vnp->modeBits;
3062 vnode.dataVersion++;
3065 IH_IWRITE(vnodeInfo[class].handle,
3066 vnodeIndexOffset(vcp, vnodeNumber),
3067 (char *)&vnode, sizeof(vnode));
3068 assert(nBytes == sizeof(vnode));
3074 if (!Showmode && ofiles) {
3075 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3077 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3081 for (class = 0; class < nVNODECLASSES; class++) {
3082 register struct VnodeInfo *vip = &vnodeInfo[class];
3083 for (i = 0; i < vip->nVnodes; i++)
3084 if (vip->vnodes[i].name)
3085 free(vip->vnodes[i].name);
3092 /* Set correct resource utilization statistics */
3093 volHeader.filecount = FilesInVolume;
3094 volHeader.diskused = BlocksInVolume;
3096 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3097 if (volHeader.uniquifier < (maxunique + 1)) {
3099 Log("Volume uniquifier is too low; fixed\n");
3100 /* Plus 2,000 in case there are workstations out there with
3101 * cached vnodes that have since been deleted
3103 volHeader.uniquifier = (maxunique + 1 + 2000);
3106 /* Turn off the inUse bit; the volume's been salvaged! */
3107 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3108 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3109 volHeader.inService = 1; /* allow service again */
3110 volHeader.needsCallback = (VolumeChanged != 0);
3111 volHeader.dontSalvage = DONT_SALVAGE;
3114 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3115 assert(nBytes == sizeof(volHeader));
3118 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3119 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3120 FilesInVolume, BlocksInVolume);
3122 IH_RELEASE(vnodeInfo[vSmall].handle);
3123 IH_RELEASE(vnodeInfo[vLarge].handle);
3129 ClearROInUseBit(struct VolumeSummary *summary)
3131 IHandle_t *h = summary->volumeInfoHandle;
3132 afs_sfsize_t nBytes;
3134 VolumeDiskData volHeader;
3136 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3137 assert(nBytes == sizeof(volHeader));
3138 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3139 volHeader.inUse = 0;
3140 volHeader.needsSalvaged = 0;
3141 volHeader.inService = 1;
3142 volHeader.dontSalvage = DONT_SALVAGE;
3144 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3145 assert(nBytes == sizeof(volHeader));
3150 * Possible delete the volume.
3152 * deleteMe - Always do so, only a partial volume.
3155 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3158 if (readOnly(isp) || deleteMe) {
3159 if (isp->volSummary && isp->volSummary->fileName) {
3162 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3164 Log("It will be deleted on this server (you may find it elsewhere)\n");
3167 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3169 Log("it will be deleted instead. It should be recloned.\n");
3172 unlink(isp->volSummary->fileName);
3174 } else if (!check) {
3175 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3177 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3183 AskOffline(VolumeId volumeId, char * partition)
3187 for (i = 0; i < 3; i++) {
3188 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3190 if (code == SYNC_OK) {
3192 } else if (code == SYNC_DENIED) {
3193 #ifdef DEMAND_ATTACH_ENABLE
3194 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3196 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3198 Abort("Salvage aborted\n");
3199 } else if (code == SYNC_BAD_COMMAND) {
3200 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3202 #ifdef DEMAND_ATTACH_ENABLE
3203 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3205 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3207 Abort("Salvage aborted\n");
3210 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3211 FSYNC_clientFinis();
3215 if (code != SYNC_OK) {
3216 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3217 Abort("Salvage aborted\n");
3222 AskOnline(VolumeId volumeId, char *partition)
3226 for (i = 0; i < 3; i++) {
3227 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3229 if (code == SYNC_OK) {
3231 } else if (code == SYNC_DENIED) {
3232 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3233 } else if (code == SYNC_BAD_COMMAND) {
3234 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3236 #ifdef DEMAND_ATTACH_ENABLE
3237 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3239 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3244 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3245 FSYNC_clientFinis();
3252 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3254 /* Volume parameter is passed in case iopen is upgraded in future to
3255 * require a volume Id to be passed
3258 IHandle_t *srcH, *destH;
3259 FdHandle_t *srcFdP, *destFdP;
3262 IH_INIT(srcH, device, rwvolume, inode1);
3263 srcFdP = IH_OPEN(srcH);
3264 assert(srcFdP != NULL);
3265 IH_INIT(destH, device, rwvolume, inode2);
3266 destFdP = IH_OPEN(destH);
3268 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3269 assert(FDH_WRITE(destFdP, buf, n) == n);
3271 FDH_REALLYCLOSE(srcFdP);
3272 FDH_REALLYCLOSE(destFdP);
3279 PrintInodeList(void)
3281 register struct ViceInodeInfo *ip;
3282 struct ViceInodeInfo *buf;
3283 struct afs_stat status;
3286 assert(afs_fstat(inodeFd, &status) == 0);
3287 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3288 assert(buf != NULL);
3289 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3290 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3291 for (ip = buf; nInodes--; ip++) {
3292 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3293 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3294 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3295 ip->u.param[2], ip->u.param[3]);
3301 PrintInodeSummary(void)
3304 struct InodeSummary *isp;
3306 for (i = 0; i < nVolumesInInodeFile; i++) {
3307 isp = &inodeSummary[i];
3308 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3313 PrintVolumeSummary(void)
3316 struct VolumeSummary *vsp;
3318 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3319 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3329 assert(0); /* Fork is never executed in the NT code path */
3333 #ifdef AFS_DEMAND_ATTACH_FS
3334 if ((f == 0) && (programType == salvageServer)) {
3335 /* we are a salvageserver child */
3336 #ifdef FSSYNC_BUILD_CLIENT
3337 VChildProcReconnectFS_r();
3339 #ifdef SALVSYNC_BUILD_CLIENT
3343 #endif /* AFS_DEMAND_ATTACH_FS */
3344 #endif /* !AFS_NT40_ENV */
3355 #ifdef AFS_DEMAND_ATTACH_FS
3356 if (programType == salvageServer) {
3357 #ifdef SALVSYNC_BUILD_CLIENT
3360 #ifdef FSSYNC_BUILD_CLIENT
3364 #endif /* AFS_DEMAND_ATTACH_FS */
3367 if (main_thread != pthread_self())
3368 pthread_exit((void *)code);
3381 pid = wait(&status);
3383 if (WCOREDUMP(status))
3384 Log("\"%s\" core dumped!\n", prog);
3385 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3391 TimeStamp(time_t clock, int precision)
3394 static char timestamp[20];
3395 lt = localtime(&clock);
3397 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3399 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3404 CheckLogFile(char * log_path)
3406 char oldSlvgLog[AFSDIR_PATH_MAX];
3408 #ifndef AFS_NT40_ENV
3415 strcpy(oldSlvgLog, log_path);
3416 strcat(oldSlvgLog, ".old");
3418 renamefile(log_path, oldSlvgLog);
3419 logFile = afs_fopen(log_path, "a");
3421 if (!logFile) { /* still nothing, use stdout */
3425 #ifndef AFS_NAMEI_ENV
3426 AFS_DEBUG_IOPS_LOG(logFile);
3431 #ifndef AFS_NT40_ENV
3433 TimeStampLogFile(char * log_path)
3435 char stampSlvgLog[AFSDIR_PATH_MAX];
3440 lt = localtime(&now);
3441 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3442 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3443 log_path, lt->tm_year + 1900,
3444 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3447 /* try to link the logfile to a timestamped filename */
3448 /* if it fails, oh well, nothing we can do */
3449 link(log_path, stampSlvgLog);
3458 #ifndef AFS_NT40_ENV
3460 printf("Can't show log since using syslog.\n");
3469 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3472 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3475 while (fgets(line, sizeof(line), logFile))
3482 Log(const char *format, ...)
3488 va_start(args, format);
3489 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3491 #ifndef AFS_NT40_ENV
3493 syslog(LOG_INFO, "%s", tmp);
3497 gettimeofday(&now, 0);
3498 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3504 Abort(const char *format, ...)
3509 va_start(args, format);
3510 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3512 #ifndef AFS_NT40_ENV
3514 syslog(LOG_INFO, "%s", tmp);
3518 fprintf(logFile, "%s", tmp);
3533 p = (char *)malloc(strlen(s) + 1);
3540 /* Remove the FORCESALVAGE file */
3542 RemoveTheForce(char *path)
3544 if (!Testing && ForceSalvage) {
3545 if (chdir(path) == 0)
3546 unlink("FORCESALVAGE");
3550 #ifndef AFS_AIX32_ENV
3552 * UseTheForceLuke - see if we can use the force
3555 UseTheForceLuke(char *path)
3557 struct afs_stat force;
3559 assert(chdir(path) != -1);
3561 return (afs_stat("FORCESALVAGE", &force) == 0);
3565 * UseTheForceLuke - see if we can use the force
3568 * The VRMIX fsck will not muck with the filesystem it is supposedly
3569 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3570 * muck directly with the root inode, which is within the normal
3572 * ListViceInodes() has a side effect of setting ForceSalvage if
3573 * it detects a need, based on root inode examination.
3576 UseTheForceLuke(char *path)
3579 return 0; /* sorry OB1 */
3584 /* NT support routines */
3586 static char execpathname[MAX_PATH];
3588 nt_SalvagePartition(char *partName, int jobn)
3593 if (!*execpathname) {
3594 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3595 if (!n || n == 1023)
3598 job.cj_magic = SALVAGER_MAGIC;
3599 job.cj_number = jobn;
3600 (void)strcpy(job.cj_part, partName);
3601 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3606 nt_SetupPartitionSalvage(void *datap, int len)
3608 childJob_t *jobp = (childJob_t *) datap;
3609 char logname[AFSDIR_PATH_MAX];
3611 if (len != sizeof(childJob_t))
3613 if (jobp->cj_magic != SALVAGER_MAGIC)
3618 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3620 logFile = afs_fopen(logname, "w");
3628 #endif /* AFS_NT40_ENV */