2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
208 extern void *calloc();
210 static char *TimeStamp(time_t clock, int precision);
213 int debug; /* -d flag */
214 extern int Testing; /* -n flag */
215 int ListInodeOption; /* -i flag */
216 int ShowRootFiles; /* -r flag */
217 int RebuildDirs; /* -sal flag */
218 int Parallel = 4; /* -para X flag */
219 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
220 int forceR = 0; /* -b flag */
221 int ShowLog = 0; /* -showlog flag */
222 int ShowSuid = 0; /* -showsuid flag */
223 int ShowMounts = 0; /* -showmounts flag */
224 int orphans = ORPH_IGNORE; /* -orphans option */
229 int useSyslog = 0; /* -syslog flag */
230 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
239 #define MAXPARALLEL 32
241 int OKToZap; /* -o flag */
242 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
243 * in the volume header */
245 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
247 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
250 * information that is 'global' to a particular salvage job.
253 Device fileSysDevice; /**< The device number of the current partition
255 char fileSysPath[8]; /**< The path of the mounted partition currently
256 * being salvaged, i.e. the directory containing
257 * the volume headers */
258 char *fileSysPathName; /**< NT needs this to make name pretty log. */
259 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
260 int VGLinkH_cnt; /**< # of references to lnk handle. */
261 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
264 char *fileSysDeviceName; /**< The block device where the file system being
265 * salvaged was mounted */
266 char *filesysfulldev;
268 int VolumeChanged; /**< Set by any routine which would change the
269 * volume in a way which would require callbacks
270 * to be broken if the volume was put back on
271 * on line by an active file server */
273 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
274 * header dealt with */
276 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
277 FD_t inodeFd; /**< File descriptor for inode file */
279 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
280 int nVolumes; /**< Number of volumes (read-write and read-only)
281 * in volume summary */
282 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
285 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
286 * vnodes in the volume that
287 * we are currently looking
289 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
290 * to contact the fileserver over FSYNC */
297 /* Forward declarations */
298 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
299 static int AskVolumeSummary(struct SalvInfo *salvinfo,
300 VolumeId singleVolumeNumber);
301 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
303 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
304 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
305 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
307 /* Uniquifier stored in the Inode */
312 return (u & 0x3fffff);
314 #if defined(AFS_SGI_EXMAG)
315 return (u & SGI_UNIQMASK);
318 #endif /* AFS_SGI_EXMAG */
325 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
327 return 0; /* otherwise may be transient, e.g. EMFILE */
332 char *save_args[MAX_ARGS];
334 extern pthread_t main_thread;
335 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
339 * Get the salvage lock if not already held. Hold until process exits.
341 * @param[in] locktype READ_LOCK or WRITE_LOCK
344 _ObtainSalvageLock(int locktype)
346 struct VLockFile salvageLock;
351 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
353 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
356 "salvager: There appears to be another salvager running! "
361 "salvager: Error %d trying to acquire salvage lock! "
367 ObtainSalvageLock(void)
369 _ObtainSalvageLock(WRITE_LOCK);
372 ObtainSharedSalvageLock(void)
374 _ObtainSalvageLock(READ_LOCK);
378 #ifdef AFS_SGI_XFS_IOPS_ENV
379 /* Check if the given partition is mounted. For XFS, the root inode is not a
380 * constant. So we check the hard way.
383 IsPartitionMounted(char *part)
386 struct mntent *mntent;
388 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
389 while (mntent = getmntent(mntfp)) {
390 if (!strcmp(part, mntent->mnt_dir))
395 return mntent ? 1 : 1;
398 /* Check if the given inode is the root of the filesystem. */
399 #ifndef AFS_SGI_XFS_IOPS_ENV
401 IsRootInode(struct afs_stat_st *status)
404 * The root inode is not a fixed value in XFS partitions. So we need to
405 * see if the partition is in the list of mounted partitions. This only
406 * affects the SalvageFileSys path, so we check there.
408 return (status->st_ino == ROOTINODE);
413 #ifndef AFS_NAMEI_ENV
414 /* We don't want to salvage big files filesystems, since we can't put volumes on
418 CheckIfBigFilesFS(char *mountPoint, char *devName)
420 struct superblock fs;
423 if (strncmp(devName, "/dev/", 5)) {
424 (void)sprintf(name, "/dev/%s", devName);
426 (void)strcpy(name, devName);
429 if (ReadSuper(&fs, name) < 0) {
430 Log("Unable to read superblock. Not salvaging partition %s.\n",
434 if (IsBigFilesFileSystem(&fs)) {
435 Log("Partition %s is a big files filesystem, not salvaging.\n",
445 #define HDSTR "\\Device\\Harddisk"
446 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
448 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
454 static int dowarn = 1;
456 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
458 if (strncmp(res1, HDSTR, HDLEN)) {
461 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
462 res1, HDSTR, p1->devName);
465 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
467 if (strncmp(res2, HDSTR, HDLEN)) {
470 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
471 res2, HDSTR, p2->devName);
475 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
478 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
481 /* This assumes that two partitions with the same device number divided by
482 * PartsPerDisk are on the same disk.
485 SalvageFileSysParallel(struct DiskPartition64 *partP)
488 struct DiskPartition64 *partP;
489 int pid; /* Pid for this job */
490 int jobnumb; /* Log file job number */
491 struct job *nextjob; /* Next partition on disk to salvage */
493 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
494 struct job *thisjob = 0;
495 static int numjobs = 0;
496 static int jobcount = 0;
502 char logFileName[256];
506 /* We have a partition to salvage. Copy it into thisjob */
507 thisjob = (struct job *)malloc(sizeof(struct job));
509 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
512 memset(thisjob, 0, sizeof(struct job));
513 thisjob->partP = partP;
514 thisjob->jobnumb = jobcount;
516 } else if (jobcount == 0) {
517 /* We are asking to wait for all jobs (partp == 0), yet we never
520 Log("No file system partitions named %s* found; not salvaged\n",
521 VICE_PARTITION_PREFIX);
525 if (debug || Parallel == 1) {
527 SalvageFileSys(thisjob->partP, 0);
534 /* Check to see if thisjob is for a disk that we are already
535 * salvaging. If it is, link it in as the next job to do. The
536 * jobs array has 1 entry per disk being salvages. numjobs is
537 * the total number of disks currently being salvaged. In
538 * order to keep thejobs array compact, when a disk is
539 * completed, the hightest element in the jobs array is moved
540 * down to now open slot.
542 for (j = 0; j < numjobs; j++) {
543 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
544 /* On same disk, add it to this list and return */
545 thisjob->nextjob = jobs[j]->nextjob;
546 jobs[j]->nextjob = thisjob;
553 /* Loop until we start thisjob or until all existing jobs are finished */
554 while (thisjob || (!partP && (numjobs > 0))) {
555 startjob = -1; /* No new job to start */
557 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
558 /* Either the max jobs are running or we have to wait for all
559 * the jobs to finish. In either case, we wait for at least one
560 * job to finish. When it's done, clean up after it.
562 pid = wait(&wstatus);
563 osi_Assert(pid != -1);
564 for (j = 0; j < numjobs; j++) { /* Find which job it is */
565 if (pid == jobs[j]->pid)
568 osi_Assert(j < numjobs);
569 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
570 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
573 numjobs--; /* job no longer running */
574 oldjob = jobs[j]; /* remember */
575 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
576 free(oldjob); /* free the old job */
578 /* If there is another partition on the disk to salvage, then
579 * say we will start it (startjob). If not, then put thisjob there
580 * and say we will start it.
582 if (jobs[j]) { /* Another partitions to salvage */
583 startjob = j; /* Will start it */
584 } else { /* There is not another partition to salvage */
586 jobs[j] = thisjob; /* Add thisjob */
588 startjob = j; /* Will start it */
590 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
591 startjob = -1; /* Don't start it - already running */
595 /* We don't have to wait for a job to complete */
597 jobs[numjobs] = thisjob; /* Add this job */
599 startjob = numjobs; /* Will start it */
603 /* Start up a new salvage job on a partition in job slot "startjob" */
604 if (startjob != -1) {
606 Log("Starting salvage of file system partition %s\n",
607 jobs[startjob]->partP->name);
609 /* For NT, we not only fork, but re-exec the salvager. Pass in the
610 * commands and pass the child job number via the data path.
613 nt_SalvagePartition(jobs[startjob]->partP->name,
614 jobs[startjob]->jobnumb);
615 jobs[startjob]->pid = pid;
620 jobs[startjob]->pid = pid;
626 for (fd = 0; fd < 16; fd++)
633 openlog("salvager", LOG_PID, useSyslogFacility);
637 (void)afs_snprintf(logFileName, sizeof logFileName,
639 AFSDIR_SERVER_SLVGLOG_FILEPATH,
640 jobs[startjob]->jobnumb);
641 logFile = afs_fopen(logFileName, "w");
646 SalvageFileSys1(jobs[startjob]->partP, 0);
651 } /* while ( thisjob || (!partP && numjobs > 0) ) */
653 /* If waited for all jobs to complete, now collect log files and return */
655 if (!useSyslog) /* if syslogging - no need to collect */
658 for (i = 0; i < jobcount; i++) {
659 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
660 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
661 if ((passLog = afs_fopen(logFileName, "r"))) {
662 while (fgets(buf, sizeof(buf), passLog)) {
667 (void)unlink(logFileName);
676 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
678 if (!canfork || debug || Fork() == 0) {
679 SalvageFileSys1(partP, singleVolumeNumber);
680 if (canfork && !debug) {
685 Wait("SalvageFileSys");
689 get_DevName(char *pbuffer, char *wpath)
691 char pbuf[128], *ptr;
692 strcpy(pbuf, pbuffer);
693 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
699 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
701 strcpy(pbuffer, ptr + 1);
708 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
711 char inodeListPath[256];
712 FD_t inodeFile = INVALID_FD;
713 static char tmpDevName[100];
714 static char wpath[100];
715 struct VolumeSummary *vsp, *esp;
719 struct SalvInfo l_salvinfo;
720 struct SalvInfo *salvinfo = &l_salvinfo;
723 memset(salvinfo, 0, sizeof(*salvinfo));
726 if (inodeFile != INVALID_FD) {
728 inodeFile = INVALID_FD;
730 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
731 Abort("Raced too many times with fileserver restarts while trying to "
732 "checkout/lock volumes; Aborted\n");
734 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
736 /* unlock all previous volume locks, since we're about to lock them
738 VLockFileReinit(&partP->volLockFile);
740 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
742 salvinfo->fileSysPartition = partP;
743 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
744 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
747 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
748 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
749 name = partP->devName;
751 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
752 strcpy(tmpDevName, partP->devName);
753 name = get_DevName(tmpDevName, wpath);
754 salvinfo->fileSysDeviceName = name;
755 salvinfo->filesysfulldev = wpath;
758 if (singleVolumeNumber) {
759 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
760 /* only non-DAFS locks the partition when salvaging a single volume;
761 * DAFS will lock the individual volumes in the VG */
762 VLockPartition(partP->name);
763 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
767 /* salvageserver already setup fssync conn for us */
768 if ((programType != salvageServer) && !VConnectFS()) {
769 Abort("Couldn't connect to file server\n");
772 salvinfo->useFSYNC = 1;
773 AskOffline(salvinfo, singleVolumeNumber);
774 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
775 if (LockVolume(salvinfo, singleVolumeNumber)) {
778 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
781 salvinfo->useFSYNC = 0;
782 VLockPartition(partP->name);
786 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
789 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
790 partP->name, name, (Testing ? "(READONLY mode)" : ""));
792 Log("***Forced salvage of all volumes on this partition***\n");
797 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
804 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
805 while ((dp = readdir(dirp))) {
806 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
807 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
809 Log("Removing old salvager temp files %s\n", dp->d_name);
810 strcpy(npath, salvinfo->fileSysPath);
811 strcat(npath, OS_DIRSEP);
812 strcat(npath, dp->d_name);
818 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
820 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
821 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
823 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
827 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
828 if (inodeFile == INVALID_FD) {
829 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
832 /* Using nt_unlink here since we're really using the delete on close
833 * semantics of unlink. In most places in the salvager, we really do
834 * mean to unlink the file at that point. Those places have been
835 * modified to actually do that so that the NT crt can be used there.
837 * jaltman - On NT delete on close cannot be applied to a file while the
838 * process has an open file handle that does not have DELETE file
839 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
840 * delete privileges. As a result the nt_unlink() call will always
843 code = nt_unlink(inodeListPath);
845 code = unlink(inodeListPath);
848 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
851 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
853 if (singleVolumeNumber) {
854 /* the volume group -- let alone the volume -- does not exist,
855 * but we checked it out, so give it back to the fileserver */
856 AskDelete(salvinfo, singleVolumeNumber);
860 salvinfo->inodeFd = inodeFile;
861 if (salvinfo->inodeFd == INVALID_FD)
862 Abort("Temporary file %s is missing...\n", inodeListPath);
863 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
864 if (ListInodeOption) {
865 PrintInodeList(salvinfo);
866 if (singleVolumeNumber) {
867 /* We've checked out the volume from the fileserver, and we need
868 * to give it back. We don't know if the volume exists or not,
869 * so we don't know whether to AskOnline or not. Try to determine
870 * if the volume exists by trying to read the volume header, and
871 * AskOnline if it is readable. */
872 MaybeAskOnline(salvinfo, singleVolumeNumber);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
930 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
931 /* unlock vol headers so the fs can attach them when we AskOnline */
932 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
933 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
935 /* Step through the volumeSummary list and set all volumes on-line.
936 * Most volumes were taken off-line in GetVolumeSummary.
937 * If a volume was deleted, don't tell the fileserver anything, since
938 * we already told the fileserver the volume was deleted back when we
939 * we destroyed the volume header.
940 * Also, make sure we bring the singleVolumeNumber back online first.
943 for (j = 0; j < salvinfo->nVolumes; j++) {
944 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
946 if (!salvinfo->volumeSummaryp[j].deleted) {
947 AskOnline(salvinfo, singleVolumeNumber);
953 /* If singleVolumeNumber is not in our volumeSummary, it means that
954 * at least one other volume in the VG is on the partition, but the
955 * RW volume is not. We've already AskOffline'd it by now, though,
956 * so make sure we don't still have the volume checked out. */
957 AskDelete(salvinfo, singleVolumeNumber);
960 for (j = 0; j < salvinfo->nVolumes; j++) {
961 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
962 if (!salvinfo->volumeSummaryp[j].deleted) {
963 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
969 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
970 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
973 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
977 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
980 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
983 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
986 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
988 Log("Error %ld destroying volume disk header for volume %lu\n",
989 afs_printable_int32_ld(code),
990 afs_printable_uint32_lu(vsp->header.id));
993 /* make sure we actually delete the fileName file; ENOENT
994 * is fine, since VDestroyVolumeDiskHeader probably already
996 if (unlink(path) && errno != ENOENT) {
997 Log("Unable to unlink %s (errno = %d)\n", path, errno);
999 if (salvinfo->useFSYNC) {
1000 AskDelete(salvinfo, vsp->header.id);
1008 CompareInodes(const void *_p1, const void *_p2)
1010 const struct ViceInodeInfo *p1 = _p1;
1011 const struct ViceInodeInfo *p2 = _p2;
1012 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1013 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1014 VolumeId p1rwid, p2rwid;
1016 (p1->u.vnode.vnodeNumber ==
1017 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1019 (p2->u.vnode.vnodeNumber ==
1020 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1021 if (p1rwid < p2rwid)
1023 if (p1rwid > p2rwid)
1025 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1026 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1027 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1028 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1029 if (p1->u.vnode.volumeId == p1rwid)
1031 if (p2->u.vnode.volumeId == p2rwid)
1033 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1035 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1036 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1037 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1039 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1041 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1043 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1045 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1047 /* The following tests are reversed, so that the most desirable
1048 * of several similar inodes comes first */
1049 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1050 #ifdef AFS_3DISPARES
1051 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1052 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1055 #ifdef AFS_SGI_EXMAG
1056 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1057 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1062 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1063 #ifdef AFS_3DISPARES
1064 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1065 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1068 #ifdef AFS_SGI_EXMAG
1069 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1070 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1075 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1076 #ifdef AFS_3DISPARES
1077 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1078 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1081 #ifdef AFS_SGI_EXMAG
1082 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1083 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1088 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1089 #ifdef AFS_3DISPARES
1090 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1091 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1094 #ifdef AFS_SGI_EXMAG
1095 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1096 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1105 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1106 struct InodeSummary *summary)
1108 VolumeId volume = ip->u.vnode.volumeId;
1109 VolumeId rwvolume = volume;
1114 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1116 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1118 rwvolume = ip->u.special.parentId;
1119 /* This isn't quite right, as there could (in error) be different
1120 * parent inodes in different special vnodes */
1122 if (maxunique < ip->u.vnode.vnodeUniquifier)
1123 maxunique = ip->u.vnode.vnodeUniquifier;
1127 summary->volumeId = volume;
1128 summary->RWvolumeId = rwvolume;
1129 summary->nInodes = n;
1130 summary->nSpecialInodes = nSpecial;
1131 summary->maxUniquifier = maxunique;
1135 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1137 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1138 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1139 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1144 * Collect list of inodes in file named by path. If a truly fatal error,
1145 * unlink the file and abort. For lessor errors, return -1. The file will
1146 * be unlinked by the caller.
1149 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1153 struct ViceInodeInfo *ip, *ip_save;
1154 struct InodeSummary summary;
1155 char summaryFileName[50];
1156 FD_t summaryFile = INVALID_FD;
1158 char *dev = salvinfo->fileSysPath;
1159 char *wpath = salvinfo->fileSysPath;
1161 char *dev = salvinfo->fileSysDeviceName;
1162 char *wpath = salvinfo->filesysfulldev;
1164 char *part = salvinfo->fileSysPath;
1167 afs_sfsize_t st_size;
1169 /* This file used to come from vfsck; cobble it up ourselves now... */
1171 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1172 singleVolumeNumber ? OnlyOneVolume : 0,
1173 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1175 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1178 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1180 if (forceSal && !ForceSalvage) {
1181 Log("***Forced salvage of all volumes on this partition***\n");
1184 OS_SEEK(inodeFile, 0L, SEEK_SET);
1185 salvinfo->inodeFd = inodeFile;
1186 if (salvinfo->inodeFd == INVALID_FD ||
1187 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1188 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1190 tdir = (tmpdir ? tmpdir : part);
1192 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1193 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1195 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1196 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1198 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1199 if (summaryFile == INVALID_FD) {
1200 Abort("Unable to create inode summary file\n");
1204 /* Using nt_unlink here since we're really using the delete on close
1205 * semantics of unlink. In most places in the salvager, we really do
1206 * mean to unlink the file at that point. Those places have been
1207 * modified to actually do that so that the NT crt can be used there.
1209 * jaltman - As commented elsewhere, this cannot work because fopen()
1210 * does not open files with DELETE and FILE_SHARE_DELETE.
1212 code = nt_unlink(summaryFileName);
1214 code = unlink(summaryFileName);
1217 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1220 if (!canfork || debug || Fork() == 0) {
1221 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1223 OS_CLOSE(summaryFile);
1224 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1225 RemoveTheForce(salvinfo->fileSysPath);
1227 struct VolumeSummary *vsp;
1230 GetVolumeSummary(salvinfo, singleVolumeNumber);
1232 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1234 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1237 Log("%s vice inodes on %s; not salvaged\n",
1238 singleVolumeNumber ? "No applicable" : "No", dev);
1241 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1243 OS_CLOSE(summaryFile);
1245 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1248 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1249 OS_CLOSE(summaryFile);
1250 Abort("Unable to read inode table; %s not salvaged\n", dev);
1252 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1253 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1254 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1255 OS_CLOSE(summaryFile);
1256 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1261 CountVolumeInodes(ip, nInodes, &summary);
1262 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1263 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1264 OS_CLOSE(summaryFile);
1267 summary.index += (summary.nInodes);
1268 nInodes -= summary.nInodes;
1269 ip += summary.nInodes;
1272 ip = ip_save = NULL;
1273 /* Following fflush is not fclose, because if it was debug mode would not work */
1274 if (OS_SYNC(summaryFile) == -1) {
1275 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1276 OS_CLOSE(summaryFile);
1279 if (canfork && !debug) {
1284 if (Wait("Inode summary") == -1) {
1285 OS_CLOSE(summaryFile);
1286 Exit(1); /* salvage of this partition aborted */
1290 st_size = OS_SIZE(summaryFile);
1291 osi_Assert(st_size >= 0);
1294 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1295 osi_Assert(salvinfo->inodeSummary != NULL);
1296 /* For GNU we need to do lseek to get the file pointer moved. */
1297 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1298 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1299 osi_Assert(ret == st_size);
1301 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1302 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1303 salvinfo->inodeSummary[i].volSummary = NULL;
1305 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1306 OS_CLOSE(summaryFile);
1310 /* Comparison routine for volume sort.
1311 This is setup so that a read-write volume comes immediately before
1312 any read-only clones of that volume */
1314 CompareVolumes(const void *_p1, const void *_p2)
1316 const struct VolumeSummary *p1 = _p1;
1317 const struct VolumeSummary *p2 = _p2;
1318 if (p1->header.parent != p2->header.parent)
1319 return p1->header.parent < p2->header.parent ? -1 : 1;
1320 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1322 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1324 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1328 * Gleans volumeSummary information by asking the fileserver
1330 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1331 * salvaging a whole partition
1333 * @return whether we obtained the volume summary information or not
1334 * @retval 0 success; we obtained the volume summary information
1335 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1337 * @retval 1 we did not get the volume summary information; either the
1338 * fileserver responded with an error, or we are not supposed to
1339 * ask the fileserver for the information (e.g. we are salvaging
1340 * the entire partition or we are not the salvageserver)
1342 * @note for non-DAFS, always returns 1
1345 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1348 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1349 if (programType == salvageServer) {
1350 if (singleVolumeNumber) {
1351 FSSYNC_VGQry_response_t q_res;
1353 struct VolumeSummary *vsp;
1355 struct VolumeDiskHeader diskHdr;
1357 memset(&res, 0, sizeof(res));
1359 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1362 * We must wait for the partition to finish scanning before
1363 * can continue, since we will not know if we got the entire
1364 * VG membership unless the partition is fully scanned.
1365 * We could, in theory, just scan the partition ourselves if
1366 * the VG cache is not ready, but we would be doing the exact
1367 * same scan the fileserver is doing; it will almost always
1368 * be faster to wait for the fileserver. The only exceptions
1369 * are if the partition does not take very long to scan, and
1370 * in that case it's fast either way, so who cares?
1372 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1373 Log("waiting for fileserver to finish scanning partition %s...\n",
1374 salvinfo->fileSysPartition->name);
1376 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1377 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1378 * just so small partitions don't need to wait over 10
1379 * seconds every time, and large partitions are generally
1380 * polled only once every ten seconds. */
1381 sleep((i > 10) ? (i = 10) : i);
1383 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1387 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1388 /* This can happen if there's no header for the volume
1389 * we're salvaging, or no headers exist for the VG (if
1390 * we're salvaging an RW). Act as if we got a response
1391 * with no VG members. The headers may be created during
1392 * salvaging, if there are inodes in this VG. */
1394 memset(&q_res, 0, sizeof(q_res));
1395 q_res.rw = singleVolumeNumber;
1399 Log("fileserver refused VGCQuery request for volume %lu on "
1400 "partition %s, code %ld reason %ld\n",
1401 afs_printable_uint32_lu(singleVolumeNumber),
1402 salvinfo->fileSysPartition->name,
1403 afs_printable_int32_ld(code),
1404 afs_printable_int32_ld(res.hdr.reason));
1408 if (q_res.rw != singleVolumeNumber) {
1409 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1410 afs_printable_uint32_lu(singleVolumeNumber),
1411 afs_printable_uint32_lu(q_res.rw));
1412 #ifdef SALVSYNC_BUILD_CLIENT
1413 if (SALVSYNC_LinkVolume(q_res.rw,
1415 salvinfo->fileSysPartition->name,
1417 Log("schedule request failed\n");
1419 #endif /* SALVSYNC_BUILD_CLIENT */
1420 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1423 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1424 osi_Assert(salvinfo->volumeSummaryp != NULL);
1426 salvinfo->nVolumes = 0;
1427 vsp = salvinfo->volumeSummaryp;
1429 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1430 char name[VMAXPATHLEN];
1432 if (!q_res.children[i]) {
1436 /* AskOffline for singleVolumeNumber was called much earlier */
1437 if (q_res.children[i] != singleVolumeNumber) {
1438 AskOffline(salvinfo, q_res.children[i]);
1439 if (LockVolume(salvinfo, q_res.children[i])) {
1445 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1447 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1448 afs_printable_uint32_lu(q_res.children[i]));
1453 DiskToVolumeHeader(&vsp->header, &diskHdr);
1454 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1455 vsp->fileName = ToString(name);
1456 salvinfo->nVolumes++;
1460 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1465 Log("Cannot get volume summary from fileserver; falling back to scanning "
1466 "entire partition\n");
1469 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1474 * count how many volume headers are found by VWalkVolumeHeaders.
1476 * @param[in] dp the disk partition (unused)
1477 * @param[in] name full path to the .vol header (unused)
1478 * @param[in] hdr the header data (unused)
1479 * @param[in] last whether this is the last try or not (unused)
1480 * @param[in] rock actually an afs_int32*; the running count of how many
1481 * volumes we have found
1486 CountHeader(struct DiskPartition64 *dp, const char *name,
1487 struct VolumeDiskHeader *hdr, int last, void *rock)
1489 afs_int32 *nvols = (afs_int32 *)rock;
1495 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1498 struct SalvageScanParams {
1499 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1500 * vol id of the VG we're salvaging */
1501 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1502 * we're filling in */
1503 afs_int32 nVolumes; /**< # of vols we've encountered */
1504 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1505 * # of vols we've alloc'd memory for) */
1506 int retry; /**< do we need to retry vol lock/checkout? */
1507 struct SalvInfo *salvinfo; /**< salvage job info */
1511 * records volume summary info found from VWalkVolumeHeaders.
1513 * Found volumes are also taken offline if they are in the specific volume
1514 * group we are looking for.
1516 * @param[in] dp the disk partition
1517 * @param[in] name full path to the .vol header
1518 * @param[in] hdr the header data
1519 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1520 * @param[in] rock actually a struct SalvageScanParams*, containing the
1521 * information needed to record the volume summary data
1523 * @return operation status
1525 * @retval -1 volume locking raced with fileserver restart; checking out
1526 * and locking volumes needs to be retried
1527 * @retval 1 volume header is mis-named and should be deleted
1530 RecordHeader(struct DiskPartition64 *dp, const char *name,
1531 struct VolumeDiskHeader *hdr, int last, void *rock)
1533 char nameShouldBe[64];
1534 struct SalvageScanParams *params;
1535 struct VolumeSummary summary;
1536 VolumeId singleVolumeNumber;
1537 struct SalvInfo *salvinfo;
1539 params = (struct SalvageScanParams *)rock;
1541 singleVolumeNumber = params->singleVolumeNumber;
1542 salvinfo = params->salvinfo;
1544 DiskToVolumeHeader(&summary.header, hdr);
1546 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1547 && summary.header.parent != singleVolumeNumber) {
1549 if (programType == salvageServer) {
1550 #ifdef SALVSYNC_BUILD_CLIENT
1551 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1552 summary.header.id, summary.header.parent);
1553 if (SALVSYNC_LinkVolume(summary.header.parent,
1557 Log("schedule request failed\n");
1560 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1563 Log("%u is a read-only volume; not salvaged\n",
1564 singleVolumeNumber);
1569 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1570 || summary.header.parent == singleVolumeNumber) {
1572 /* check if the header file is incorrectly named */
1574 const char *base = strrchr(name, OS_DIRSEPC);
1581 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1582 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1585 if (strcmp(nameShouldBe, base)) {
1586 /* .vol file has wrong name; retry/delete */
1590 if (!badname || last) {
1591 /* only offline the volume if the header is good, or if this is
1592 * the last try looking at it; avoid AskOffline'ing the same vol
1595 if (singleVolumeNumber
1596 && summary.header.id != singleVolumeNumber) {
1597 /* don't offline singleVolumeNumber; we already did that
1600 AskOffline(salvinfo, summary.header.id);
1602 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1604 /* don't lock the volume if the header is bad, since we're
1605 * about to delete it anyway. */
1606 if (LockVolume(salvinfo, summary.header.id)) {
1611 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1615 if (last && !Showmode) {
1616 Log("Volume header file %s is incorrectly named (should be %s "
1617 "not %s); %sdeleted (it will be recreated later, if "
1618 "necessary)\n", name, nameShouldBe, base,
1619 (Testing ? "it would have been " : ""));
1624 summary.fileName = ToString(base);
1627 if (params->nVolumes > params->totalVolumes) {
1628 /* We found more volumes than we found on the first partition walk;
1629 * apparently something created a volume while we were
1630 * partition-salvaging, or we found more than 20 vols when salvaging a
1631 * particular volume. Abort if we detect this, since other programs
1632 * supposed to not touch the partition while it is partition-salvaging,
1633 * and we shouldn't find more than 20 vols in a VG.
1635 Abort("Found %ld vol headers, but should have found at most %ld! "
1636 "Make sure the volserver/fileserver are not running at the "
1637 "same time as a partition salvage\n",
1638 afs_printable_int32_ld(params->nVolumes),
1639 afs_printable_int32_ld(params->totalVolumes));
1642 memcpy(params->vsp, &summary, sizeof(summary));
1650 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1652 * If the header could not be read in at all, the header is always unlinked.
1653 * If instead RecordHeader said the header was bad (that is, the header file
1654 * is mis-named), we only unlink if we are doing a partition salvage, as
1655 * opposed to salvaging a specific volume group.
1657 * @param[in] dp the disk partition
1658 * @param[in] name full path to the .vol header
1659 * @param[in] hdr header data, or NULL if the header could not be read
1660 * @param[in] rock actually a struct SalvageScanParams*, with some information
1664 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1665 struct VolumeDiskHeader *hdr, void *rock)
1667 struct SalvageScanParams *params;
1670 params = (struct SalvageScanParams *)rock;
1673 /* no header; header is too bogus to read in at all */
1675 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1681 } else if (!params->singleVolumeNumber) {
1682 /* We were able to read in a header, but RecordHeader said something
1683 * was wrong with it. We only unlink those if we are doing a partition
1690 if (dounlink && unlink(name)) {
1691 Log("Error %d while trying to unlink %s\n", errno, name);
1696 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1697 * the fileserver for VG information, or by scanning the /vicepX partition.
1699 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1700 * are salvaging, or 0 if this is a partition
1703 * @return operation status
1705 * @retval -1 we raced with a fileserver restart; checking out and locking
1706 * volumes must be retried
1709 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1711 afs_int32 nvols = 0;
1712 struct SalvageScanParams params;
1715 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1717 /* we successfully got the vol information from the fileserver; no
1718 * need to scan the partition */
1722 /* we need to retry volume checkout */
1726 if (!singleVolumeNumber) {
1727 /* Count how many volumes we have in /vicepX */
1728 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1731 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1736 nvols = VOL_VG_MAX_VOLS;
1739 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1740 osi_Assert(salvinfo->volumeSummaryp != NULL);
1742 params.singleVolumeNumber = singleVolumeNumber;
1743 params.vsp = salvinfo->volumeSummaryp;
1744 params.nVolumes = 0;
1745 params.totalVolumes = nvols;
1747 params.salvinfo = salvinfo;
1749 /* walk the partition directory of volume headers and record the info
1750 * about them; unlinking invalid headers */
1751 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1752 UnlinkHeader, ¶ms);
1754 /* we apparently need to retry checking-out/locking volumes */
1758 Abort("Failed to get volume header summary\n");
1760 salvinfo->nVolumes = params.nVolumes;
1762 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1768 /* Find the link table. This should be associated with the RW volume or, if
1769 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1772 FindLinkHandle(struct InodeSummary *isp, int nVols,
1773 struct ViceInodeInfo *allInodes)
1776 struct ViceInodeInfo *ip;
1778 for (i = 0; i < nVols; i++) {
1779 ip = allInodes + isp[i].index;
1780 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1781 if (ip[j].u.special.type == VI_LINKTABLE)
1782 return ip[j].inodeNumber;
1789 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1791 struct versionStamp version;
1794 if (!VALID_INO(ino))
1796 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1797 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1798 if (!VALID_INO(ino))
1800 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1801 isp->RWvolumeId, errno);
1802 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1803 fdP = IH_OPEN(salvinfo->VGLinkH);
1805 Abort("Can't open link table for volume %u (error = %d)\n",
1806 isp->RWvolumeId, errno);
1808 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1809 Abort("Can't truncate link table for volume %u (error = %d)\n",
1810 isp->RWvolumeId, errno);
1812 version.magic = LINKTABLEMAGIC;
1813 version.version = LINKTABLEVERSION;
1815 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1817 Abort("Can't truncate link table for volume %u (error = %d)\n",
1818 isp->RWvolumeId, errno);
1820 FDH_REALLYCLOSE(fdP);
1822 /* If the volume summary exits (i.e., the V*.vol header file exists),
1823 * then set this inode there as well.
1825 if (isp->volSummary)
1826 isp->volSummary->header.linkTable = ino;
1835 SVGParms_t *parms = (SVGParms_t *) arg;
1836 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1841 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1844 pthread_attr_t tattr;
1848 /* Initialize per volume global variables, even if later code does so */
1849 salvinfo->VolumeChanged = 0;
1850 salvinfo->VGLinkH = NULL;
1851 salvinfo->VGLinkH_cnt = 0;
1852 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1854 parms.svgp_inodeSummaryp = isp;
1855 parms.svgp_count = nVols;
1856 parms.svgp_salvinfo = salvinfo;
1857 code = pthread_attr_init(&tattr);
1859 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1863 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1865 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1868 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1870 Log("Failed to create thread to salvage volume group %u\n",
1874 (void)pthread_join(tid, NULL);
1876 #endif /* AFS_NT40_ENV */
1879 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1881 struct ViceInodeInfo *inodes, *allInodes, *ip;
1882 int i, totalInodes, size, salvageTo;
1886 int dec_VGLinkH = 0;
1888 FdHandle_t *fdP = NULL;
1890 salvinfo->VGLinkH_cnt = 0;
1891 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1892 && isp->nSpecialInodes > 0);
1893 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1894 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1897 if (ShowMounts && !haveRWvolume)
1899 if (canfork && !debug && Fork() != 0) {
1900 (void)Wait("Salvage volume group");
1903 for (i = 0, totalInodes = 0; i < nVols; i++)
1904 totalInodes += isp[i].nInodes;
1905 size = totalInodes * sizeof(struct ViceInodeInfo);
1906 inodes = (struct ViceInodeInfo *)malloc(size);
1907 allInodes = inodes - isp->index; /* this would the base of all the inodes
1908 * for the partition, if all the inodes
1909 * had been read into memory */
1911 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1913 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1915 /* Don't try to salvage a read write volume if there isn't one on this
1917 salvageTo = haveRWvolume ? 0 : 1;
1919 #ifdef AFS_NAMEI_ENV
1920 ino = FindLinkHandle(isp, nVols, allInodes);
1921 if (VALID_INO(ino)) {
1922 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1923 fdP = IH_OPEN(salvinfo->VGLinkH);
1925 if (!VALID_INO(ino) || fdP == NULL) {
1926 Log("%s link table for volume %u.\n",
1927 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1929 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1932 struct ViceInodeInfo *ip;
1933 CreateLinkTable(salvinfo, isp, ino);
1934 fdP = IH_OPEN(salvinfo->VGLinkH);
1935 /* Sync fake 1 link counts to the link table, now that it exists */
1937 for (i = 0; i < nVols; i++) {
1938 ip = allInodes + isp[i].index;
1939 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1940 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1947 FDH_REALLYCLOSE(fdP);
1949 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1952 /* Salvage in reverse order--read/write volume last; this way any
1953 * Inodes not referenced by the time we salvage the read/write volume
1954 * can be picked up by the read/write volume */
1955 /* ACTUALLY, that's not done right now--the inodes just vanish */
1956 for (i = nVols - 1; i >= salvageTo; i--) {
1958 struct InodeSummary *lisp = &isp[i];
1959 #ifdef AFS_NAMEI_ENV
1960 /* If only the RO is present on this partition, the link table
1961 * shows up as a RW volume special file. Need to make sure the
1962 * salvager doesn't try to salvage the non-existent RW.
1964 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1965 /* If this only special inode is the link table, continue */
1966 if (inodes->u.special.type == VI_LINKTABLE) {
1973 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1974 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1975 /* Check inodes twice. The second time do things seriously. This
1976 * way the whole RO volume can be deleted, below, if anything goes wrong */
1977 for (check = 1; check >= 0; check--) {
1979 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1981 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1982 if (rw && deleteMe) {
1983 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1984 * volume won't be called */
1990 if (rw && check == 1)
1992 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1993 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1999 /* Fix actual inode counts */
2002 Log("totalInodes %d\n",totalInodes);
2003 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2004 static int TraceBadLinkCounts = 0;
2005 #ifdef AFS_NAMEI_ENV
2006 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2007 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2008 VGLinkH_p1 = ip->u.param[0];
2009 continue; /* Deal with this last. */
2012 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2013 TraceBadLinkCounts--; /* Limit reports, per volume */
2014 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2016 while (ip->linkCount > 0) {
2017 /* below used to assert, not break */
2019 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2020 Log("idec failed. inode %s errno %d\n",
2021 PrintInode(stmp, ip->inodeNumber), errno);
2027 while (ip->linkCount < 0) {
2028 /* these used to be asserts */
2030 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2031 Log("iinc failed. inode %s errno %d\n",
2032 PrintInode(stmp, ip->inodeNumber), errno);
2039 #ifdef AFS_NAMEI_ENV
2040 while (dec_VGLinkH > 0) {
2041 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2042 Log("idec failed on link table, errno = %d\n", errno);
2046 while (dec_VGLinkH < 0) {
2047 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2048 Log("iinc failed on link table, errno = %d\n", errno);
2055 /* Directory consistency checks on the rw volume */
2057 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2058 IH_RELEASE(salvinfo->VGLinkH);
2060 if (canfork && !debug) {
2067 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2069 /* Check headers BEFORE forking */
2073 for (i = 0; i < nVols; i++) {
2074 struct VolumeSummary *vs = isp[i].volSummary;
2075 VolumeDiskData volHeader;
2077 /* Don't salvage just because phantom rw volume is there... */
2078 /* (If a read-only volume exists, read/write inodes must also exist) */
2079 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2083 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2084 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2085 == sizeof(volHeader)
2086 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2087 && volHeader.dontSalvage == DONT_SALVAGE
2088 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2089 if (volHeader.inUse != 0) {
2090 volHeader.inUse = 0;
2091 volHeader.inService = 1;
2093 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2094 != sizeof(volHeader)) {
2110 /* SalvageVolumeHeaderFile
2112 * Salvage the top level V*.vol header file. Make sure the special files
2113 * exist and that there are no duplicates.
2115 * Calls SalvageHeader for each possible type of volume special file.
2119 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2120 struct ViceInodeInfo *inodes, int RW,
2121 int check, int *deleteMe)
2124 struct ViceInodeInfo *ip;
2125 int allinodesobsolete = 1;
2126 struct VolumeDiskHeader diskHeader;
2127 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2129 struct VolumeHeader tempHeader;
2130 struct afs_inode_info stuff[MAXINODETYPE];
2132 /* keeps track of special inodes that are probably 'good'; they are
2133 * referenced in the vol header, and are included in the given inodes
2138 } goodspecial[MAXINODETYPE];
2143 memset(goodspecial, 0, sizeof(goodspecial));
2145 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2147 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2149 Log("cannot allocate memory for inode skip array when salvaging "
2150 "volume %lu; not performing duplicate special inode recovery\n",
2151 afs_printable_uint32_lu(isp->volumeId));
2152 /* still try to perform the salvage; the skip array only does anything
2153 * if we detect duplicate special inodes */
2156 init_inode_info(&tempHeader, stuff);
2159 * First, look at the special inodes and see if any are referenced by
2160 * the existing volume header. If we find duplicate special inodes, we
2161 * can use this information to use the referenced inode (it's more
2162 * likely to be the 'good' one), and throw away the duplicates.
2164 if (isp->volSummary && skip) {
2165 /* use tempHeader, so we can use the stuff[] array to easily index
2166 * into the isp->volSummary special inodes */
2167 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2169 for (i = 0; i < isp->nSpecialInodes; i++) {
2170 ip = &inodes[isp->index + i];
2171 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2172 /* will get taken care of in a later loop */
2175 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2176 goodspecial[ip->u.special.type-1].valid = 1;
2177 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2182 memset(&tempHeader, 0, sizeof(tempHeader));
2183 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2184 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2185 tempHeader.id = isp->volumeId;
2186 tempHeader.parent = isp->RWvolumeId;
2188 /* Check for duplicates (inodes are sorted by type field) */
2189 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2190 ip = &inodes[isp->index + i];
2191 if (ip->u.special.type == (ip + 1)->u.special.type) {
2192 afs_ino_str_t stmp1, stmp2;
2194 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2195 /* Will be caught in the loop below */
2199 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2200 ip->u.special.type, isp->volumeId,
2201 PrintInode(stmp1, ip->inodeNumber),
2202 PrintInode(stmp2, (ip+1)->inodeNumber));
2204 if (skip && goodspecial[ip->u.special.type-1].valid) {
2205 Inode gi = goodspecial[ip->u.special.type-1].inode;
2208 Log("using special inode referenced by vol header (%s)\n",
2209 PrintInode(stmp1, gi));
2212 /* the volume header references some special inode of
2213 * this type in the inodes array; are we it? */
2214 if (ip->inodeNumber != gi) {
2216 } else if ((ip+1)->inodeNumber != gi) {
2217 /* in case this is the last iteration; we need to
2218 * make sure we check ip+1, too */
2223 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2231 for (i = 0; i < isp->nSpecialInodes; i++) {
2233 ip = &inodes[isp->index + i];
2234 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2236 Log("Rubbish header inode %s of type %d\n",
2237 PrintInode(stmp, ip->inodeNumber),
2238 ip->u.special.type);
2244 Log("Rubbish header inode %s of type %d; deleted\n",
2245 PrintInode(stmp, ip->inodeNumber),
2246 ip->u.special.type);
2247 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2248 if (skip && skip[i]) {
2249 if (orphans == ORPH_REMOVE) {
2250 Log("Removing orphan special inode %s of type %d\n",
2251 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2254 Log("Ignoring orphan special inode %s of type %d\n",
2255 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2256 /* fall through to the ip->linkCount--; line below */
2259 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2260 allinodesobsolete = 0;
2262 if (!check && ip->u.special.type != VI_LINKTABLE)
2263 ip->linkCount--; /* Keep the inode around */
2271 if (allinodesobsolete) {
2278 salvinfo->VGLinkH_cnt++; /* one for every header. */
2280 if (!RW && !check && isp->volSummary) {
2281 ClearROInUseBit(isp->volSummary);
2285 for (i = 0; i < MAXINODETYPE; i++) {
2286 if (stuff[i].inodeType == VI_LINKTABLE) {
2287 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2288 * And we may have recreated the link table earlier, so set the
2289 * RW header as well.
2291 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2292 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2296 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2300 if (isp->volSummary == NULL) {
2302 char headerName[64];
2303 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2304 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2306 Log("No header file for volume %u\n", isp->volumeId);
2310 Log("No header file for volume %u; %screating %s\n",
2311 isp->volumeId, (Testing ? "it would have been " : ""),
2313 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2314 isp->volSummary->fileName = ToString(headerName);
2316 writefunc = VCreateVolumeDiskHeader;
2319 char headerName[64];
2320 /* hack: these two fields are obsolete... */
2321 isp->volSummary->header.volumeAcl = 0;
2322 isp->volSummary->header.volumeMountTable = 0;
2325 (&isp->volSummary->header, &tempHeader,
2326 sizeof(struct VolumeHeader))) {
2327 /* We often remove the name before calling us, so we make a fake one up */
2328 if (isp->volSummary->fileName) {
2329 strcpy(headerName, isp->volSummary->fileName);
2331 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2332 isp->volSummary->fileName = ToString(headerName);
2334 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2336 Log("Header file %s is damaged or no longer valid%s\n", path,
2337 (check ? "" : "; repairing"));
2341 writefunc = VWriteVolumeDiskHeader;
2345 memcpy(&isp->volSummary->header, &tempHeader,
2346 sizeof(struct VolumeHeader));
2349 Log("It would have written a new header file for volume %u\n",
2353 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2354 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2356 Log("Error %ld writing volume header file for volume %lu\n",
2357 afs_printable_int32_ld(code),
2358 afs_printable_uint32_lu(diskHeader.id));
2363 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2364 isp->volSummary->header.volumeInfo);
2369 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2370 struct InodeSummary *isp, int check, int *deleteMe)
2373 VolumeDiskData volumeInfo;
2374 struct versionStamp fileHeader;
2383 #ifndef AFS_NAMEI_ENV
2384 if (sp->inodeType == VI_LINKTABLE)
2387 if (*(sp->inode) == 0) {
2389 Log("Missing inode in volume header (%s)\n", sp->description);
2393 Log("Missing inode in volume header (%s); %s\n", sp->description,
2394 (Testing ? "it would have recreated it" : "recreating"));
2397 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2398 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2399 if (!VALID_INO(*(sp->inode)))
2401 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2402 sp->description, errno);
2407 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2408 fdP = IH_OPEN(specH);
2409 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2410 /* bail out early and destroy the volume */
2412 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2419 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2420 sp->description, errno);
2423 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2424 || header.fileHeader.magic != sp->stamp.magic)) {
2426 Log("Part of the header (%s) is corrupted\n", sp->description);
2427 FDH_REALLYCLOSE(fdP);
2431 Log("Part of the header (%s) is corrupted; recreating\n",
2434 /* header can be garbage; make sure we don't read garbage data from
2436 memset(&header, 0, sizeof(header));
2438 if (sp->inodeType == VI_VOLINFO
2439 && header.volumeInfo.destroyMe == DESTROY_ME) {
2442 FDH_REALLYCLOSE(fdP);
2446 if (recreate && !Testing) {
2449 ("Internal error: recreating volume header (%s) in check mode\n",
2451 nBytes = FDH_TRUNC(fdP, 0);
2453 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2454 sp->description, errno);
2456 /* The following code should be moved into vutil.c */
2457 if (sp->inodeType == VI_VOLINFO) {
2459 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2460 header.volumeInfo.stamp = sp->stamp;
2461 header.volumeInfo.id = isp->volumeId;
2462 header.volumeInfo.parentId = isp->RWvolumeId;
2463 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2464 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2465 isp->volumeId, isp->volumeId);
2466 header.volumeInfo.inService = 0;
2467 header.volumeInfo.blessed = 0;
2468 /* The + 1000 is a hack in case there are any files out in venus caches */
2469 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2470 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2471 header.volumeInfo.needsCallback = 0;
2472 gettimeofday(&tp, 0);
2473 header.volumeInfo.creationDate = tp.tv_sec;
2475 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2476 sizeof(header.volumeInfo), 0);
2477 if (nBytes != sizeof(header.volumeInfo)) {
2480 ("Unable to write volume header file (%s) (errno = %d)\n",
2481 sp->description, errno);
2482 Abort("Unable to write entire volume header file (%s)\n",
2486 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2487 if (nBytes != sizeof(sp->stamp)) {
2490 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2491 sp->description, errno);
2493 ("Unable to write entire version stamp in volume header file (%s)\n",
2498 FDH_REALLYCLOSE(fdP);
2500 if (sp->inodeType == VI_VOLINFO) {
2501 salvinfo->VolInfo = header.volumeInfo;
2505 if (salvinfo->VolInfo.updateDate) {
2506 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2508 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2509 salvinfo->VolInfo.id,
2510 (Testing ? "it would have been " : ""), update);
2512 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2514 Log("%s (%u) not updated (created %s)\n",
2515 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2525 SalvageVnodes(struct SalvInfo *salvinfo,
2526 struct InodeSummary *rwIsp,
2527 struct InodeSummary *thisIsp,
2528 struct ViceInodeInfo *inodes, int check)
2530 int ilarge, ismall, ioffset, RW, nInodes;
2531 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2534 RW = (rwIsp == thisIsp);
2535 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2537 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2538 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2539 if (check && ismall == -1)
2542 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2543 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2544 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2548 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2549 struct ViceInodeInfo *ip, int nInodes,
2550 struct VolumeSummary *volSummary, int check)
2552 char buf[SIZEOF_LARGEDISKVNODE];
2553 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2555 StreamHandle_t *file;
2556 struct VnodeClassInfo *vcp;
2558 afs_sfsize_t nVnodes;
2559 afs_fsize_t vnodeLength;
2561 afs_ino_str_t stmp1, stmp2;
2565 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2566 fdP = IH_OPEN(handle);
2567 osi_Assert(fdP != NULL);
2568 file = FDH_FDOPEN(fdP, "r+");
2569 osi_Assert(file != NULL);
2570 vcp = &VnodeClassInfo[class];
2571 size = OS_SIZE(fdP->fd_fd);
2572 osi_Assert(size != -1);
2573 nVnodes = (size / vcp->diskSize) - 1;
2575 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2576 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2580 for (vnodeIndex = 0;
2581 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2582 nVnodes--, vnodeIndex++) {
2583 if (vnode->type != vNull) {
2584 int vnodeChanged = 0;
2585 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2586 if (VNDISK_GET_INO(vnode) == 0) {
2588 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2589 memset(vnode, 0, vcp->diskSize);
2593 if (vcp->magic != vnode->vnodeMagic) {
2594 /* bad magic #, probably partially created vnode */
2596 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2597 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2598 afs_printable_uint32_lu(vcp->magic));
2599 memset(vnode, 0, vcp->diskSize);
2603 Log("Partially allocated vnode %d deleted.\n",
2605 memset(vnode, 0, vcp->diskSize);
2609 /* ****** Should do a bit more salvage here: e.g. make sure
2610 * vnode type matches what it should be given the index */
2611 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2612 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2613 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2614 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2621 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2622 /* The following doesn't work, because the version number
2623 * is not maintained correctly by the file server */
2624 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2625 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2627 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2633 /* For RW volume, look for vnode with matching inode number;
2634 * if no such match, take the first determined by our sort
2636 struct ViceInodeInfo *lip = ip;
2637 int lnInodes = nInodes;
2639 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2640 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2649 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2650 /* "Matching" inode */
2654 vu = vnode->uniquifier;
2655 iu = ip->u.vnode.vnodeUniquifier;
2656 vd = vnode->dataVersion;
2657 id = ip->u.vnode.inodeDataVersion;
2659 * Because of the possibility of the uniquifier overflows (> 4M)
2660 * we compare them modulo the low 22-bits; we shouldn't worry
2661 * about mismatching since they shouldn't to many old
2662 * uniquifiers of the same vnode...
2664 if (IUnique(vu) != IUnique(iu)) {
2666 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2669 vnode->uniquifier = iu;
2670 #ifdef AFS_3DISPARES
2671 vnode->dataVersion = (id >= vd ?
2674 1887437 ? vd : id) :
2677 1887437 ? id : vd));
2679 #if defined(AFS_SGI_EXMAG)
2680 vnode->dataVersion = (id >= vd ?
2683 15099494 ? vd : id) :
2686 15099494 ? id : vd));
2688 vnode->dataVersion = (id > vd ? id : vd);
2689 #endif /* AFS_SGI_EXMAG */
2690 #endif /* AFS_3DISPARES */
2693 /* don't bother checking for vd > id any more, since
2694 * partial file transfers always result in this state,
2695 * and you can't do much else anyway (you've already
2696 * found the best data you can) */
2697 #ifdef AFS_3DISPARES
2698 if (!vnodeIsDirectory(vnodeNumber)
2699 && ((vd < id && (id - vd) < 1887437)
2700 || ((vd > id && (vd - id) > 1887437)))) {
2702 #if defined(AFS_SGI_EXMAG)
2703 if (!vnodeIsDirectory(vnodeNumber)
2704 && ((vd < id && (id - vd) < 15099494)
2705 || ((vd > id && (vd - id) > 15099494)))) {
2707 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2708 #endif /* AFS_SGI_EXMAG */
2711 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2712 vnode->dataVersion = id;
2717 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2720 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2722 VNDISK_SET_INO(vnode, ip->inodeNumber);
2727 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2729 VNDISK_SET_INO(vnode, ip->inodeNumber);
2732 VNDISK_GET_LEN(vnodeLength, vnode);
2733 if (ip->byteCount != vnodeLength) {
2736 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2741 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2742 VNDISK_SET_LEN(vnode, ip->byteCount);
2746 ip->linkCount--; /* Keep the inode around */
2749 } else { /* no matching inode */
2751 if (VNDISK_GET_INO(vnode) != 0
2752 || vnode->type == vDirectory) {
2753 /* No matching inode--get rid of the vnode */
2755 if (VNDISK_GET_INO(vnode)) {
2757 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2761 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2766 if (VNDISK_GET_INO(vnode)) {
2768 time_t serverModifyTime = vnode->serverModifyTime;
2769 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2773 time_t serverModifyTime = vnode->serverModifyTime;
2774 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2777 memset(vnode, 0, vcp->diskSize);
2780 /* Should not reach here becuase we checked for
2781 * (inodeNumber == 0) above. And where we zero the vnode,
2782 * we also goto vnodeDone.
2786 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2790 } /* VNDISK_GET_INO(vnode) != 0 */
2792 osi_Assert(!(vnodeChanged && check));
2793 if (vnodeChanged && !Testing) {
2794 osi_Assert(IH_IWRITE
2795 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2796 (char *)vnode, vcp->diskSize)
2798 salvinfo->VolumeChanged = 1; /* For break call back */
2809 struct VnodeEssence *
2810 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2813 struct VnodeInfo *vip;
2816 class = vnodeIdToClass(vnodeNumber);
2817 vip = &salvinfo->vnodeInfo[class];
2818 offset = vnodeIdToBitNumber(vnodeNumber);
2819 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2823 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2825 /* Copy the directory unconditionally if we are going to change it:
2826 * not just if was cloned.
2828 struct VnodeDiskObject vnode;
2829 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2830 Inode oldinode, newinode;
2833 if (dir->copied || Testing)
2835 DFlush(); /* Well justified paranoia... */
2838 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2839 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2841 osi_Assert(code == sizeof(vnode));
2842 oldinode = VNDISK_GET_INO(&vnode);
2843 /* Increment the version number by a whole lot to avoid problems with
2844 * clients that were promised new version numbers--but the file server
2845 * crashed before the versions were written to disk.
2848 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2849 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2851 osi_Assert(VALID_INO(newinode));
2852 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2854 VNDISK_SET_INO(&vnode, newinode);
2856 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2857 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2859 osi_Assert(code == sizeof(vnode));
2861 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2862 salvinfo->fileSysDevice, newinode,
2863 &salvinfo->VolumeChanged);
2864 /* Don't delete the original inode right away, because the directory is
2865 * still being scanned.
2871 * This function should either successfully create a new dir, or give up
2872 * and leave things the way they were. In particular, if it fails to write
2873 * the new dir properly, it should return w/o changing the reference to the
2877 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2879 struct VnodeDiskObject vnode;
2880 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2881 Inode oldinode, newinode;
2886 afs_int32 parentUnique = 1;
2887 struct VnodeEssence *vnodeEssence;
2892 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2894 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2895 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2897 osi_Assert(lcode == sizeof(vnode));
2898 oldinode = VNDISK_GET_INO(&vnode);
2899 /* Increment the version number by a whole lot to avoid problems with
2900 * clients that were promised new version numbers--but the file server
2901 * crashed before the versions were written to disk.
2904 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2905 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2907 osi_Assert(VALID_INO(newinode));
2908 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2909 &salvinfo->VolumeChanged);
2911 /* Assign . and .. vnode numbers from dir and vnode.parent.
2912 * The uniquifier for . is in the vnode.
2913 * The uniquifier for .. might be set to a bogus value of 1 and
2914 * the salvager will later clean it up.
2916 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2917 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2920 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2922 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2927 /* didn't really build the new directory properly, let's just give up. */
2928 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2929 Log("Directory salvage returned code %d, continuing.\n", code);
2931 Log("also failed to decrement link count on new inode");
2935 Log("Checking the results of the directory salvage...\n");
2936 if (!DirOK(&newdir)) {
2937 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2938 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2939 osi_Assert(code == 0);
2943 VNDISK_SET_INO(&vnode, newinode);
2944 length = Length(&newdir);
2945 VNDISK_SET_LEN(&vnode, length);
2947 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2948 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2950 osi_Assert(lcode == sizeof(vnode));
2953 nt_sync(salvinfo->fileSysDevice);
2955 sync(); /* this is slow, but hopefully rarely called. We don't have
2956 * an open FD on the file itself to fsync.
2960 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2962 /* make sure old directory file is really closed */
2963 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2964 FDH_REALLYCLOSE(fdP);
2966 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2967 osi_Assert(code == 0);
2968 dir->dirHandle = newdir;
2972 * arguments for JudgeEntry.
2974 struct judgeEntry_params {
2975 struct DirSummary *dir; /**< directory we're examining entries in */
2976 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2980 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2983 struct judgeEntry_params *params = arock;
2984 struct DirSummary *dir = params->dir;
2985 struct SalvInfo *salvinfo = params->salvinfo;
2986 struct VnodeEssence *vnodeEssence;
2987 afs_int32 dirOrphaned, todelete;
2989 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2991 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2992 if (vnodeEssence == NULL) {
2994 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2997 CopyOnWrite(salvinfo, dir);
2998 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3003 #ifndef AFS_NAMEI_ENV
3004 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3005 * mount inode for the partition. If this inode were deleted, it would crash
3008 if (vnodeEssence->InodeNumber == 0) {
3009 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3011 CopyOnWrite(salvinfo, dir);
3012 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3019 if (!(vnodeNumber & 1) && !Showmode
3020 && !(vnodeEssence->count || vnodeEssence->unique
3021 || vnodeEssence->modeBits)) {
3022 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3023 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3024 vnodeNumber, unique,
3025 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3029 CopyOnWrite(salvinfo, dir);
3030 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3036 /* Check if the Uniquifiers match. If not, change the directory entry
3037 * so its unique matches the vnode unique. Delete if the unique is zero
3038 * or if the directory is orphaned.
3040 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3041 if (!vnodeEssence->unique
3042 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3043 /* This is an orphaned directory. Don't delete the . or ..
3044 * entry. Otherwise, it will get created in the next
3045 * salvage and deleted again here. So Just skip it.
3050 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3053 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3057 fid.Vnode = vnodeNumber;
3058 fid.Unique = vnodeEssence->unique;
3059 CopyOnWrite(salvinfo, dir);
3060 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3062 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3065 return 0; /* no need to continue */
3068 if (strcmp(name, ".") == 0) {
3069 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3072 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3074 CopyOnWrite(salvinfo, dir);
3075 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3076 fid.Vnode = dir->vnodeNumber;
3077 fid.Unique = dir->unique;
3078 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3081 vnodeNumber = fid.Vnode; /* Get the new Essence */
3082 unique = fid.Unique;
3083 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3086 } else if (strcmp(name, "..") == 0) {
3089 struct VnodeEssence *dotdot;
3090 pa.Vnode = dir->parent;
3091 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3092 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3093 pa.Unique = dotdot->unique;
3095 pa.Vnode = dir->vnodeNumber;
3096 pa.Unique = dir->unique;
3098 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3100 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3102 CopyOnWrite(salvinfo, dir);
3103 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3104 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3107 vnodeNumber = pa.Vnode; /* Get the new Essence */
3109 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3111 dir->haveDotDot = 1;
3112 } else if (strncmp(name, ".__afs", 6) == 0) {
3114 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3117 CopyOnWrite(salvinfo, dir);
3118 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3120 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3121 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3124 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3125 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3126 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3127 && !(vnodeEssence->modeBits & 0111)) {
3128 afs_sfsize_t nBytes;
3134 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3135 vnodeEssence->InodeNumber);
3138 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3142 size = FDH_SIZE(fdP);
3144 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3145 FDH_REALLYCLOSE(fdP);
3152 nBytes = FDH_PREAD(fdP, buf, size, 0);
3153 if (nBytes == size) {
3155 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3156 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3157 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3158 Testing ? "would convert" : "converted");
3159 vnodeEssence->modeBits |= 0111;
3160 vnodeEssence->changed = 1;
3161 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3162 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3163 dir->name ? dir->name : "??", name, buf);
3165 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3166 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3168 FDH_REALLYCLOSE(fdP);
3171 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3172 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3173 if (vnodeIdToClass(vnodeNumber) == vLarge
3174 && vnodeEssence->name == NULL) {
3176 if ((n = (char *)malloc(strlen(name) + 1)))
3178 vnodeEssence->name = n;
3181 /* The directory entry points to the vnode. Check to see if the
3182 * vnode points back to the directory. If not, then let the
3183 * directory claim it (else it might end up orphaned). Vnodes
3184 * already claimed by another directory are deleted from this
3185 * directory: hardlinks to the same vnode are not allowed
3186 * from different directories.
3188 if (vnodeEssence->parent != dir->vnodeNumber) {
3189 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3190 /* Vnode does not point back to this directory.
3191 * Orphaned dirs cannot claim a file (it may belong to
3192 * another non-orphaned dir).
3195 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3197 vnodeEssence->parent = dir->vnodeNumber;
3198 vnodeEssence->changed = 1;
3200 /* Vnode was claimed by another directory */
3203 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3204 } else if (vnodeNumber == 1) {
3205 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3207 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3211 CopyOnWrite(salvinfo, dir);
3212 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3217 /* This directory claims the vnode */
3218 vnodeEssence->claimed = 1;
3220 vnodeEssence->count--;
3225 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3226 VnodeClass class, Inode ino, Unique * maxu)
3228 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3229 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3230 char buf[SIZEOF_LARGEDISKVNODE];
3231 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3233 StreamHandle_t *file;
3238 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3239 fdP = IH_OPEN(vip->handle);
3240 osi_Assert(fdP != NULL);
3241 file = FDH_FDOPEN(fdP, "r+");
3242 osi_Assert(file != NULL);
3243 size = OS_SIZE(fdP->fd_fd);
3244 osi_Assert(size != -1);
3245 vip->nVnodes = (size / vcp->diskSize) - 1;
3246 if (vip->nVnodes > 0) {
3247 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3248 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3249 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3250 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3251 if (class == vLarge) {
3252 osi_Assert((vip->inodes = (Inode *)
3253 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3262 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3263 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3264 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3265 nVnodes--, vnodeIndex++) {
3266 if (vnode->type != vNull) {
3267 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3268 afs_fsize_t vnodeLength;
3269 vip->nAllocatedVnodes++;
3270 vep->count = vnode->linkCount;
3271 VNDISK_GET_LEN(vnodeLength, vnode);
3272 vep->blockCount = nBlocks(vnodeLength);
3273 vip->volumeBlockCount += vep->blockCount;
3274 vep->parent = vnode->parent;
3275 vep->unique = vnode->uniquifier;
3276 if (*maxu < vnode->uniquifier)
3277 *maxu = vnode->uniquifier;
3278 vep->modeBits = vnode->modeBits;
3279 vep->InodeNumber = VNDISK_GET_INO(vnode);
3280 vep->type = vnode->type;
3281 vep->author = vnode->author;
3282 vep->owner = vnode->owner;
3283 vep->group = vnode->group;
3284 if (vnode->type == vDirectory) {
3285 if (class != vLarge) {
3286 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3287 vip->nAllocatedVnodes--;
3288 memset(vnode, 0, sizeof(vnode));
3289 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3290 vnodeIndexOffset(vcp, vnodeNumber),
3291 (char *)&vnode, sizeof(vnode));
3292 salvinfo->VolumeChanged = 1;
3294 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3303 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3306 struct VnodeEssence *parentvp;
3312 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3313 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3314 strcat(path, OS_DIRSEP);
3315 strcat(path, vp->name);
3321 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3322 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3325 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3327 struct VnodeEssence *vep;
3330 return (1); /* Vnode zero does not exist */
3332 return (0); /* The root dir vnode is always claimed */
3333 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3334 if (!vep || !vep->claimed)
3335 return (1); /* Vnode is not claimed - it is orphaned */
3337 return (IsVnodeOrphaned(salvinfo, vep->parent));
3341 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3342 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3343 struct DirSummary *rootdir, int *rootdirfound)
3345 static struct DirSummary dir;
3346 static struct DirHandle dirHandle;
3347 struct VnodeEssence *parent;
3348 static char path[MAXPATHLEN];
3351 if (dirVnodeInfo->vnodes[i].salvaged)
3352 return; /* already salvaged */
3355 dirVnodeInfo->vnodes[i].salvaged = 1;
3357 if (dirVnodeInfo->inodes[i] == 0)
3358 return; /* Not allocated to a directory */
3360 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3361 if (dirVnodeInfo->vnodes[i].parent) {
3362 Log("Bad parent, vnode 1; %s...\n",
3363 (Testing ? "skipping" : "salvaging"));
3364 dirVnodeInfo->vnodes[i].parent = 0;
3365 dirVnodeInfo->vnodes[i].changed = 1;
3368 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3369 if (parent && parent->salvaged == 0)
3370 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3371 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3372 rootdir, rootdirfound);
3375 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3376 dir.unique = dirVnodeInfo->vnodes[i].unique;
3379 dir.parent = dirVnodeInfo->vnodes[i].parent;
3380 dir.haveDot = dir.haveDotDot = 0;
3381 dir.ds_linkH = alinkH;
3382 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3383 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3385 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3388 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3389 (Testing ? "skipping" : "salvaging"));
3392 CopyAndSalvage(salvinfo, &dir);
3394 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3397 dirHandle = dir.dirHandle;
3400 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3401 &dirVnodeInfo->vnodes[i], path);
3404 /* If enumeration failed for random reasons, we will probably delete
3405 * too much stuff, so we guard against this instead.