2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #include <afs/afs_assert.h>
105 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
106 #if defined(AFS_VFSINCL_ENV)
107 #include <sys/vnode.h>
109 #include <sys/fs/ufs_inode.h>
111 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
112 #include <ufs/ufs/dinode.h>
113 #include <ufs/ffs/fs.h>
115 #include <ufs/inode.h>
118 #else /* AFS_VFSINCL_ENV */
120 #include <ufs/inode.h>
121 #else /* AFS_OSF_ENV */
122 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
123 #include <sys/inode.h>
126 #endif /* AFS_VFSINCL_ENV */
127 #endif /* AFS_SGI_ENV */
130 #include <sys/lockf.h>
133 #include <checklist.h>
135 #if defined(AFS_SGI_ENV)
138 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
140 #include <sys/mnttab.h>
141 #include <sys/mntent.h>
146 #endif /* AFS_SGI_ENV */
147 #endif /* AFS_HPUX_ENV */
151 #include <afs/osi_inode.h>
155 #include <afs/afsutil.h>
156 #include <afs/fileutil.h>
161 #include <afs/afssyscalls.h>
165 #include "partition.h"
166 #include "daemon_com.h"
168 #include "volume_inline.h"
169 #include "salvsync.h"
170 #include "viceinode.h"
172 #include "volinodes.h" /* header magic number, etc. stuff */
173 #include "vol-salvage.h"
175 #include "vol_internal.h"
177 #include <afs/prs_fs.h>
179 #ifdef FSSYNC_BUILD_CLIENT
180 #include "vg_cache.h"
188 extern void *calloc();
190 static char *TimeStamp(time_t clock, int precision);
193 int debug; /* -d flag */
194 extern int Testing; /* -n flag */
195 int ListInodeOption; /* -i flag */
196 int ShowRootFiles; /* -r flag */
197 int RebuildDirs; /* -sal flag */
198 int Parallel = 4; /* -para X flag */
199 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
200 int forceR = 0; /* -b flag */
201 int ShowLog = 0; /* -showlog flag */
202 int ShowSuid = 0; /* -showsuid flag */
203 int ShowMounts = 0; /* -showmounts flag */
204 int orphans = ORPH_IGNORE; /* -orphans option */
209 int useSyslog = 0; /* -syslog flag */
210 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
219 #define MAXPARALLEL 32
221 int OKToZap; /* -o flag */
222 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
223 * in the volume header */
225 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
227 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
230 * information that is 'global' to a particular salvage job.
233 Device fileSysDevice; /**< The device number of the current partition
235 char fileSysPath[8]; /**< The path of the mounted partition currently
236 * being salvaged, i.e. the directory containing
237 * the volume headers */
238 char *fileSysPathName; /**< NT needs this to make name pretty log. */
239 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
240 int VGLinkH_cnt; /**< # of references to lnk handle. */
241 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
244 char *fileSysDeviceName; /**< The block device where the file system being
245 * salvaged was mounted */
246 char *filesysfulldev;
248 int VolumeChanged; /**< Set by any routine which would change the
249 * volume in a way which would require callbacks
250 * to be broken if the volume was put back on
251 * on line by an active file server */
253 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
254 * header dealt with */
256 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
257 FD_t inodeFd; /**< File descriptor for inode file */
259 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
260 int nVolumes; /**< Number of volumes (read-write and read-only)
261 * in volume summary */
262 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
265 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
266 * vnodes in the volume that
267 * we are currently looking
269 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
270 * to contact the fileserver over FSYNC */
277 /* Forward declarations */
278 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
279 static int AskVolumeSummary(struct SalvInfo *salvinfo,
280 VolumeId singleVolumeNumber);
281 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
284 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
287 /* Uniquifier stored in the Inode */
292 return (u & 0x3fffff);
294 #if defined(AFS_SGI_EXMAG)
295 return (u & SGI_UNIQMASK);
298 #endif /* AFS_SGI_EXMAG */
305 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
307 return 0; /* otherwise may be transient, e.g. EMFILE */
312 char *save_args[MAX_ARGS];
314 extern pthread_t main_thread;
315 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
319 * Get the salvage lock if not already held. Hold until process exits.
321 * @param[in] locktype READ_LOCK or WRITE_LOCK
324 _ObtainSalvageLock(int locktype)
326 struct VLockFile salvageLock;
331 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
333 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
336 "salvager: There appears to be another salvager running! "
341 "salvager: Error %d trying to acquire salvage lock! "
347 ObtainSalvageLock(void)
349 _ObtainSalvageLock(WRITE_LOCK);
352 ObtainSharedSalvageLock(void)
354 _ObtainSalvageLock(READ_LOCK);
358 #ifdef AFS_SGI_XFS_IOPS_ENV
359 /* Check if the given partition is mounted. For XFS, the root inode is not a
360 * constant. So we check the hard way.
363 IsPartitionMounted(char *part)
366 struct mntent *mntent;
368 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
369 while (mntent = getmntent(mntfp)) {
370 if (!strcmp(part, mntent->mnt_dir))
375 return mntent ? 1 : 1;
378 /* Check if the given inode is the root of the filesystem. */
379 #ifndef AFS_SGI_XFS_IOPS_ENV
381 IsRootInode(struct afs_stat_st *status)
384 * The root inode is not a fixed value in XFS partitions. So we need to
385 * see if the partition is in the list of mounted partitions. This only
386 * affects the SalvageFileSys path, so we check there.
388 return (status->st_ino == ROOTINODE);
393 #ifndef AFS_NAMEI_ENV
394 /* We don't want to salvage big files filesystems, since we can't put volumes on
398 CheckIfBigFilesFS(char *mountPoint, char *devName)
400 struct superblock fs;
403 if (strncmp(devName, "/dev/", 5)) {
404 (void)sprintf(name, "/dev/%s", devName);
406 (void)strcpy(name, devName);
409 if (ReadSuper(&fs, name) < 0) {
410 Log("Unable to read superblock. Not salvaging partition %s.\n",
414 if (IsBigFilesFileSystem(&fs)) {
415 Log("Partition %s is a big files filesystem, not salvaging.\n",
425 #define HDSTR "\\Device\\Harddisk"
426 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
428 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
434 static int dowarn = 1;
436 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
438 if (strncmp(res1, HDSTR, HDLEN)) {
441 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
442 res1, HDSTR, p1->devName);
445 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
447 if (strncmp(res2, HDSTR, HDLEN)) {
450 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
451 res2, HDSTR, p2->devName);
455 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
458 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
461 /* This assumes that two partitions with the same device number divided by
462 * PartsPerDisk are on the same disk.
465 SalvageFileSysParallel(struct DiskPartition64 *partP)
468 struct DiskPartition64 *partP;
469 int pid; /* Pid for this job */
470 int jobnumb; /* Log file job number */
471 struct job *nextjob; /* Next partition on disk to salvage */
473 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
474 struct job *thisjob = 0;
475 static int numjobs = 0;
476 static int jobcount = 0;
482 char logFileName[256];
486 /* We have a partition to salvage. Copy it into thisjob */
487 thisjob = (struct job *)malloc(sizeof(struct job));
489 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
492 memset(thisjob, 0, sizeof(struct job));
493 thisjob->partP = partP;
494 thisjob->jobnumb = jobcount;
496 } else if (jobcount == 0) {
497 /* We are asking to wait for all jobs (partp == 0), yet we never
500 Log("No file system partitions named %s* found; not salvaged\n",
501 VICE_PARTITION_PREFIX);
505 if (debug || Parallel == 1) {
507 SalvageFileSys(thisjob->partP, 0);
514 /* Check to see if thisjob is for a disk that we are already
515 * salvaging. If it is, link it in as the next job to do. The
516 * jobs array has 1 entry per disk being salvages. numjobs is
517 * the total number of disks currently being salvaged. In
518 * order to keep thejobs array compact, when a disk is
519 * completed, the hightest element in the jobs array is moved
520 * down to now open slot.
522 for (j = 0; j < numjobs; j++) {
523 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
524 /* On same disk, add it to this list and return */
525 thisjob->nextjob = jobs[j]->nextjob;
526 jobs[j]->nextjob = thisjob;
533 /* Loop until we start thisjob or until all existing jobs are finished */
534 while (thisjob || (!partP && (numjobs > 0))) {
535 startjob = -1; /* No new job to start */
537 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
538 /* Either the max jobs are running or we have to wait for all
539 * the jobs to finish. In either case, we wait for at least one
540 * job to finish. When it's done, clean up after it.
542 pid = wait(&wstatus);
543 osi_Assert(pid != -1);
544 for (j = 0; j < numjobs; j++) { /* Find which job it is */
545 if (pid == jobs[j]->pid)
548 osi_Assert(j < numjobs);
549 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
550 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
553 numjobs--; /* job no longer running */
554 oldjob = jobs[j]; /* remember */
555 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
556 free(oldjob); /* free the old job */
558 /* If there is another partition on the disk to salvage, then
559 * say we will start it (startjob). If not, then put thisjob there
560 * and say we will start it.
562 if (jobs[j]) { /* Another partitions to salvage */
563 startjob = j; /* Will start it */
564 } else { /* There is not another partition to salvage */
566 jobs[j] = thisjob; /* Add thisjob */
568 startjob = j; /* Will start it */
570 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
571 startjob = -1; /* Don't start it - already running */
575 /* We don't have to wait for a job to complete */
577 jobs[numjobs] = thisjob; /* Add this job */
579 startjob = numjobs; /* Will start it */
583 /* Start up a new salvage job on a partition in job slot "startjob" */
584 if (startjob != -1) {
586 Log("Starting salvage of file system partition %s\n",
587 jobs[startjob]->partP->name);
589 /* For NT, we not only fork, but re-exec the salvager. Pass in the
590 * commands and pass the child job number via the data path.
593 nt_SalvagePartition(jobs[startjob]->partP->name,
594 jobs[startjob]->jobnumb);
595 jobs[startjob]->pid = pid;
600 jobs[startjob]->pid = pid;
606 for (fd = 0; fd < 16; fd++)
613 openlog("salvager", LOG_PID, useSyslogFacility);
617 snprintf(logFileName, sizeof logFileName, "%s.%d",
618 AFSDIR_SERVER_SLVGLOG_FILEPATH,
619 jobs[startjob]->jobnumb);
620 logFile = afs_fopen(logFileName, "w");
625 SalvageFileSys1(jobs[startjob]->partP, 0);
630 } /* while ( thisjob || (!partP && numjobs > 0) ) */
632 /* If waited for all jobs to complete, now collect log files and return */
634 if (!useSyslog) /* if syslogging - no need to collect */
637 for (i = 0; i < jobcount; i++) {
638 snprintf(logFileName, sizeof logFileName, "%s.%d",
639 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
640 if ((passLog = afs_fopen(logFileName, "r"))) {
641 while (fgets(buf, sizeof(buf), passLog)) {
646 (void)unlink(logFileName);
655 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
657 if (!canfork || debug || Fork() == 0) {
658 SalvageFileSys1(partP, singleVolumeNumber);
659 if (canfork && !debug) {
664 Wait("SalvageFileSys");
668 get_DevName(char *pbuffer, char *wpath)
670 char pbuf[128], *ptr;
671 strcpy(pbuf, pbuffer);
672 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
678 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
680 strcpy(pbuffer, ptr + 1);
687 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
690 char inodeListPath[256];
691 FD_t inodeFile = INVALID_FD;
692 static char tmpDevName[100];
693 static char wpath[100];
694 struct VolumeSummary *vsp, *esp;
698 struct SalvInfo l_salvinfo;
699 struct SalvInfo *salvinfo = &l_salvinfo;
702 memset(salvinfo, 0, sizeof(*salvinfo));
705 if (inodeFile != INVALID_FD) {
707 inodeFile = INVALID_FD;
709 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
710 Abort("Raced too many times with fileserver restarts while trying to "
711 "checkout/lock volumes; Aborted\n");
713 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
715 /* unlock all previous volume locks, since we're about to lock them
717 VLockFileReinit(&partP->volLockFile);
719 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
721 salvinfo->fileSysPartition = partP;
722 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
723 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
726 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
727 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
728 name = partP->devName;
730 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
731 strcpy(tmpDevName, partP->devName);
732 name = get_DevName(tmpDevName, wpath);
733 salvinfo->fileSysDeviceName = name;
734 salvinfo->filesysfulldev = wpath;
737 if (singleVolumeNumber) {
738 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
739 /* only non-DAFS locks the partition when salvaging a single volume;
740 * DAFS will lock the individual volumes in the VG */
741 VLockPartition(partP->name);
742 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
746 /* salvageserver already setup fssync conn for us */
747 if ((programType != salvageServer) && !VConnectFS()) {
748 Abort("Couldn't connect to file server\n");
751 salvinfo->useFSYNC = 1;
752 AskOffline(salvinfo, singleVolumeNumber);
753 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
754 if (LockVolume(salvinfo, singleVolumeNumber)) {
757 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
760 salvinfo->useFSYNC = 0;
761 VLockPartition(partP->name);
765 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
768 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
769 partP->name, name, (Testing ? "(READONLY mode)" : ""));
771 Log("***Forced salvage of all volumes on this partition***\n");
776 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
783 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
784 while ((dp = readdir(dirp))) {
785 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
786 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
788 Log("Removing old salvager temp files %s\n", dp->d_name);
789 strcpy(npath, salvinfo->fileSysPath);
790 strcat(npath, OS_DIRSEP);
791 strcat(npath, dp->d_name);
797 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
799 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
800 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
802 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
806 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
807 if (inodeFile == INVALID_FD) {
808 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
811 /* Using nt_unlink here since we're really using the delete on close
812 * semantics of unlink. In most places in the salvager, we really do
813 * mean to unlink the file at that point. Those places have been
814 * modified to actually do that so that the NT crt can be used there.
816 * jaltman - On NT delete on close cannot be applied to a file while the
817 * process has an open file handle that does not have DELETE file
818 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
819 * delete privileges. As a result the nt_unlink() call will always
822 code = nt_unlink(inodeListPath);
824 code = unlink(inodeListPath);
827 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
830 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
832 if (singleVolumeNumber) {
833 /* the volume group -- let alone the volume -- does not exist,
834 * but we checked it out, so give it back to the fileserver */
835 AskDelete(salvinfo, singleVolumeNumber);
839 salvinfo->inodeFd = inodeFile;
840 if (salvinfo->inodeFd == INVALID_FD)
841 Abort("Temporary file %s is missing...\n", inodeListPath);
842 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
843 if (ListInodeOption) {
844 PrintInodeList(salvinfo);
845 if (singleVolumeNumber) {
846 /* We've checked out the volume from the fileserver, and we need
847 * to give it back. We don't know if the volume exists or not,
848 * so we don't know whether to AskOnline or not. Try to determine
849 * if the volume exists by trying to read the volume header, and
850 * AskOnline if it is readable. */
851 MaybeAskOnline(salvinfo, singleVolumeNumber);
855 /* enumerate volumes in the partition.
856 * figure out sets of read-only + rw volumes.
857 * salvage each set, read-only volumes first, then read-write.
858 * Fix up inodes on last volume in set (whether it is read-write
861 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
865 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
866 i < salvinfo->nVolumesInInodeFile; i = j) {
867 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
869 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
871 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
872 struct VolumeSummary *tsp;
873 /* Scan volume list (from partition root directory) looking for the
874 * current rw volume number in the volume list from the inode scan.
875 * If there is one here that is not in the inode volume list,
877 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
879 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
881 /* Now match up the volume summary info from the root directory with the
882 * entry in the volume list obtained from scanning inodes */
883 salvinfo->inodeSummary[j].volSummary = NULL;
884 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
885 if (tsp->header.id == vid) {
886 salvinfo->inodeSummary[j].volSummary = tsp;
892 /* Salvage the group of volumes (several read-only + 1 read/write)
893 * starting with the current read-only volume we're looking at.
895 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
898 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
899 for (; vsp < esp; vsp++) {
901 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
904 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
905 RemoveTheForce(salvinfo->fileSysPath);
907 if (!Testing && singleVolumeNumber) {
909 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
910 /* unlock vol headers so the fs can attach them when we AskOnline */
911 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
912 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
914 /* Step through the volumeSummary list and set all volumes on-line.
915 * Most volumes were taken off-line in GetVolumeSummary.
916 * If a volume was deleted, don't tell the fileserver anything, since
917 * we already told the fileserver the volume was deleted back when we
918 * we destroyed the volume header.
919 * Also, make sure we bring the singleVolumeNumber back online first.
922 for (j = 0; j < salvinfo->nVolumes; j++) {
923 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
925 if (!salvinfo->volumeSummaryp[j].deleted) {
926 AskOnline(salvinfo, singleVolumeNumber);
932 /* If singleVolumeNumber is not in our volumeSummary, it means that
933 * at least one other volume in the VG is on the partition, but the
934 * RW volume is not. We've already AskOffline'd it by now, though,
935 * so make sure we don't still have the volume checked out. */
936 AskDelete(salvinfo, singleVolumeNumber);
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
941 if (!salvinfo->volumeSummaryp[j].deleted) {
942 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
948 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
949 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
952 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
956 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
959 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
962 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
965 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
967 Log("Error %ld destroying volume disk header for volume %lu\n",
968 afs_printable_int32_ld(code),
969 afs_printable_uint32_lu(vsp->header.id));
972 /* make sure we actually delete the fileName file; ENOENT
973 * is fine, since VDestroyVolumeDiskHeader probably already
975 if (unlink(path) && errno != ENOENT) {
976 Log("Unable to unlink %s (errno = %d)\n", path, errno);
978 if (salvinfo->useFSYNC) {
979 AskDelete(salvinfo, vsp->header.id);
987 CompareInodes(const void *_p1, const void *_p2)
989 const struct ViceInodeInfo *p1 = _p1;
990 const struct ViceInodeInfo *p2 = _p2;
991 if (p1->u.vnode.vnodeNumber == INODESPECIAL
992 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
993 VolumeId p1rwid, p2rwid;
995 (p1->u.vnode.vnodeNumber ==
996 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
998 (p2->u.vnode.vnodeNumber ==
999 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1000 if (p1rwid < p2rwid)
1002 if (p1rwid > p2rwid)
1004 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1005 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1006 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1007 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1008 if (p1->u.vnode.volumeId == p1rwid)
1010 if (p2->u.vnode.volumeId == p2rwid)
1012 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1014 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1015 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1016 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1018 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1020 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1022 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1024 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1026 /* The following tests are reversed, so that the most desirable
1027 * of several similar inodes comes first */
1028 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1029 #ifdef AFS_3DISPARES
1030 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1031 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1034 #ifdef AFS_SGI_EXMAG
1035 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1036 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1041 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1042 #ifdef AFS_3DISPARES
1043 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1044 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1047 #ifdef AFS_SGI_EXMAG
1048 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1049 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1054 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1055 #ifdef AFS_3DISPARES
1056 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1057 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1060 #ifdef AFS_SGI_EXMAG
1061 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1062 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1067 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1068 #ifdef AFS_3DISPARES
1069 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1070 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1073 #ifdef AFS_SGI_EXMAG
1074 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1075 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1084 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1085 struct InodeSummary *summary)
1087 VolumeId volume = ip->u.vnode.volumeId;
1088 VolumeId rwvolume = volume;
1093 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1095 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1097 rwvolume = ip->u.special.parentId;
1098 /* This isn't quite right, as there could (in error) be different
1099 * parent inodes in different special vnodes */
1101 if (maxunique < ip->u.vnode.vnodeUniquifier)
1102 maxunique = ip->u.vnode.vnodeUniquifier;
1106 summary->volumeId = volume;
1107 summary->RWvolumeId = rwvolume;
1108 summary->nInodes = n;
1109 summary->nSpecialInodes = nSpecial;
1110 summary->maxUniquifier = maxunique;
1114 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1116 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1117 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1118 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1123 * Collect list of inodes in file named by path. If a truly fatal error,
1124 * unlink the file and abort. For lessor errors, return -1. The file will
1125 * be unlinked by the caller.
1128 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1132 struct ViceInodeInfo *ip, *ip_save;
1133 struct InodeSummary summary;
1134 char summaryFileName[50];
1135 FD_t summaryFile = INVALID_FD;
1137 char *dev = salvinfo->fileSysPath;
1138 char *wpath = salvinfo->fileSysPath;
1140 char *dev = salvinfo->fileSysDeviceName;
1141 char *wpath = salvinfo->filesysfulldev;
1143 char *part = salvinfo->fileSysPath;
1146 afs_sfsize_t st_size;
1148 /* This file used to come from vfsck; cobble it up ourselves now... */
1150 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1151 singleVolumeNumber ? OnlyOneVolume : 0,
1152 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1154 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1157 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1159 if (forceSal && !ForceSalvage) {
1160 Log("***Forced salvage of all volumes on this partition***\n");
1163 OS_SEEK(inodeFile, 0L, SEEK_SET);
1164 salvinfo->inodeFd = inodeFile;
1165 if (salvinfo->inodeFd == INVALID_FD ||
1166 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1167 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1169 tdir = (tmpdir ? tmpdir : part);
1171 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1172 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1174 snprintf(summaryFileName, sizeof summaryFileName,
1175 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1177 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1178 if (summaryFile == INVALID_FD) {
1179 Abort("Unable to create inode summary file\n");
1183 /* Using nt_unlink here since we're really using the delete on close
1184 * semantics of unlink. In most places in the salvager, we really do
1185 * mean to unlink the file at that point. Those places have been
1186 * modified to actually do that so that the NT crt can be used there.
1188 * jaltman - As commented elsewhere, this cannot work because fopen()
1189 * does not open files with DELETE and FILE_SHARE_DELETE.
1191 code = nt_unlink(summaryFileName);
1193 code = unlink(summaryFileName);
1196 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1199 if (!canfork || debug || Fork() == 0) {
1200 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1202 OS_CLOSE(summaryFile);
1203 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1204 RemoveTheForce(salvinfo->fileSysPath);
1206 struct VolumeSummary *vsp;
1209 GetVolumeSummary(salvinfo, singleVolumeNumber);
1211 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1213 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1216 Log("%s vice inodes on %s; not salvaged\n",
1217 singleVolumeNumber ? "No applicable" : "No", dev);
1220 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1222 OS_CLOSE(summaryFile);
1224 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1227 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1228 OS_CLOSE(summaryFile);
1229 Abort("Unable to read inode table; %s not salvaged\n", dev);
1231 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1232 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1233 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1234 OS_CLOSE(summaryFile);
1235 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1240 CountVolumeInodes(ip, nInodes, &summary);
1241 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1242 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1243 OS_CLOSE(summaryFile);
1246 summary.index += (summary.nInodes);
1247 nInodes -= summary.nInodes;
1248 ip += summary.nInodes;
1251 ip = ip_save = NULL;
1252 /* Following fflush is not fclose, because if it was debug mode would not work */
1253 if (OS_SYNC(summaryFile) == -1) {
1254 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1255 OS_CLOSE(summaryFile);
1258 if (canfork && !debug) {
1263 if (Wait("Inode summary") == -1) {
1264 OS_CLOSE(summaryFile);
1265 Exit(1); /* salvage of this partition aborted */
1269 st_size = OS_SIZE(summaryFile);
1270 osi_Assert(st_size >= 0);
1273 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1274 osi_Assert(salvinfo->inodeSummary != NULL);
1275 /* For GNU we need to do lseek to get the file pointer moved. */
1276 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1277 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1278 osi_Assert(ret == st_size);
1280 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1281 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1282 salvinfo->inodeSummary[i].volSummary = NULL;
1284 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1285 OS_CLOSE(summaryFile);
1289 /* Comparison routine for volume sort.
1290 This is setup so that a read-write volume comes immediately before
1291 any read-only clones of that volume */
1293 CompareVolumes(const void *_p1, const void *_p2)
1295 const struct VolumeSummary *p1 = _p1;
1296 const struct VolumeSummary *p2 = _p2;
1297 if (p1->header.parent != p2->header.parent)
1298 return p1->header.parent < p2->header.parent ? -1 : 1;
1299 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1301 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1303 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1307 * Gleans volumeSummary information by asking the fileserver
1309 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1310 * salvaging a whole partition
1312 * @return whether we obtained the volume summary information or not
1313 * @retval 0 success; we obtained the volume summary information
1314 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1316 * @retval 1 we did not get the volume summary information; either the
1317 * fileserver responded with an error, or we are not supposed to
1318 * ask the fileserver for the information (e.g. we are salvaging
1319 * the entire partition or we are not the salvageserver)
1321 * @note for non-DAFS, always returns 1
1324 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1327 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1328 if (programType == salvageServer) {
1329 if (singleVolumeNumber) {
1330 FSSYNC_VGQry_response_t q_res;
1332 struct VolumeSummary *vsp;
1334 struct VolumeDiskHeader diskHdr;
1336 memset(&res, 0, sizeof(res));
1338 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1341 * We must wait for the partition to finish scanning before
1342 * can continue, since we will not know if we got the entire
1343 * VG membership unless the partition is fully scanned.
1344 * We could, in theory, just scan the partition ourselves if
1345 * the VG cache is not ready, but we would be doing the exact
1346 * same scan the fileserver is doing; it will almost always
1347 * be faster to wait for the fileserver. The only exceptions
1348 * are if the partition does not take very long to scan, and
1349 * in that case it's fast either way, so who cares?
1351 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1352 Log("waiting for fileserver to finish scanning partition %s...\n",
1353 salvinfo->fileSysPartition->name);
1355 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1356 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1357 * just so small partitions don't need to wait over 10
1358 * seconds every time, and large partitions are generally
1359 * polled only once every ten seconds. */
1360 sleep((i > 10) ? (i = 10) : i);
1362 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1366 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1367 /* This can happen if there's no header for the volume
1368 * we're salvaging, or no headers exist for the VG (if
1369 * we're salvaging an RW). Act as if we got a response
1370 * with no VG members. The headers may be created during
1371 * salvaging, if there are inodes in this VG. */
1373 memset(&q_res, 0, sizeof(q_res));
1374 q_res.rw = singleVolumeNumber;
1378 Log("fileserver refused VGCQuery request for volume %lu on "
1379 "partition %s, code %ld reason %ld\n",
1380 afs_printable_uint32_lu(singleVolumeNumber),
1381 salvinfo->fileSysPartition->name,
1382 afs_printable_int32_ld(code),
1383 afs_printable_int32_ld(res.hdr.reason));
1387 if (q_res.rw != singleVolumeNumber) {
1388 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1389 afs_printable_uint32_lu(singleVolumeNumber),
1390 afs_printable_uint32_lu(q_res.rw));
1391 #ifdef SALVSYNC_BUILD_CLIENT
1392 if (SALVSYNC_LinkVolume(q_res.rw,
1394 salvinfo->fileSysPartition->name,
1396 Log("schedule request failed\n");
1398 #endif /* SALVSYNC_BUILD_CLIENT */
1399 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1402 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1403 osi_Assert(salvinfo->volumeSummaryp != NULL);
1405 salvinfo->nVolumes = 0;
1406 vsp = salvinfo->volumeSummaryp;
1408 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1409 char name[VMAXPATHLEN];
1411 if (!q_res.children[i]) {
1415 /* AskOffline for singleVolumeNumber was called much earlier */
1416 if (q_res.children[i] != singleVolumeNumber) {
1417 AskOffline(salvinfo, q_res.children[i]);
1418 if (LockVolume(salvinfo, q_res.children[i])) {
1424 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1426 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1427 afs_printable_uint32_lu(q_res.children[i]));
1432 DiskToVolumeHeader(&vsp->header, &diskHdr);
1433 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1434 vsp->fileName = ToString(name);
1435 salvinfo->nVolumes++;
1439 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1444 Log("Cannot get volume summary from fileserver; falling back to scanning "
1445 "entire partition\n");
1448 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1453 * count how many volume headers are found by VWalkVolumeHeaders.
1455 * @param[in] dp the disk partition (unused)
1456 * @param[in] name full path to the .vol header (unused)
1457 * @param[in] hdr the header data (unused)
1458 * @param[in] last whether this is the last try or not (unused)
1459 * @param[in] rock actually an afs_int32*; the running count of how many
1460 * volumes we have found
1465 CountHeader(struct DiskPartition64 *dp, const char *name,
1466 struct VolumeDiskHeader *hdr, int last, void *rock)
1468 afs_int32 *nvols = (afs_int32 *)rock;
1474 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1477 struct SalvageScanParams {
1478 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1479 * vol id of the VG we're salvaging */
1480 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1481 * we're filling in */
1482 afs_int32 nVolumes; /**< # of vols we've encountered */
1483 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1484 * # of vols we've alloc'd memory for) */
1485 int retry; /**< do we need to retry vol lock/checkout? */
1486 struct SalvInfo *salvinfo; /**< salvage job info */
1490 * records volume summary info found from VWalkVolumeHeaders.
1492 * Found volumes are also taken offline if they are in the specific volume
1493 * group we are looking for.
1495 * @param[in] dp the disk partition
1496 * @param[in] name full path to the .vol header
1497 * @param[in] hdr the header data
1498 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1499 * @param[in] rock actually a struct SalvageScanParams*, containing the
1500 * information needed to record the volume summary data
1502 * @return operation status
1504 * @retval -1 volume locking raced with fileserver restart; checking out
1505 * and locking volumes needs to be retried
1506 * @retval 1 volume header is mis-named and should be deleted
1509 RecordHeader(struct DiskPartition64 *dp, const char *name,
1510 struct VolumeDiskHeader *hdr, int last, void *rock)
1512 char nameShouldBe[64];
1513 struct SalvageScanParams *params;
1514 struct VolumeSummary summary;
1515 VolumeId singleVolumeNumber;
1516 struct SalvInfo *salvinfo;
1518 params = (struct SalvageScanParams *)rock;
1520 singleVolumeNumber = params->singleVolumeNumber;
1521 salvinfo = params->salvinfo;
1523 DiskToVolumeHeader(&summary.header, hdr);
1525 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1526 && summary.header.parent != singleVolumeNumber) {
1528 if (programType == salvageServer) {
1529 #ifdef SALVSYNC_BUILD_CLIENT
1530 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1531 summary.header.id, summary.header.parent);
1532 if (SALVSYNC_LinkVolume(summary.header.parent,
1536 Log("schedule request failed\n");
1539 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1542 Log("%u is a read-only volume; not salvaged\n",
1543 singleVolumeNumber);
1548 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1549 || summary.header.parent == singleVolumeNumber) {
1551 /* check if the header file is incorrectly named */
1553 const char *base = strrchr(name, OS_DIRSEPC);
1560 snprintf(nameShouldBe, sizeof nameShouldBe,
1561 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1564 if (strcmp(nameShouldBe, base)) {
1565 /* .vol file has wrong name; retry/delete */
1569 if (!badname || last) {
1570 /* only offline the volume if the header is good, or if this is
1571 * the last try looking at it; avoid AskOffline'ing the same vol
1574 if (singleVolumeNumber
1575 && summary.header.id != singleVolumeNumber) {
1576 /* don't offline singleVolumeNumber; we already did that
1579 AskOffline(salvinfo, summary.header.id);
1581 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1583 /* don't lock the volume if the header is bad, since we're
1584 * about to delete it anyway. */
1585 if (LockVolume(salvinfo, summary.header.id)) {
1590 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1594 if (last && !Showmode) {
1595 Log("Volume header file %s is incorrectly named (should be %s "
1596 "not %s); %sdeleted (it will be recreated later, if "
1597 "necessary)\n", name, nameShouldBe, base,
1598 (Testing ? "it would have been " : ""));
1603 summary.fileName = ToString(base);
1606 if (params->nVolumes > params->totalVolumes) {
1607 /* We found more volumes than we found on the first partition walk;
1608 * apparently something created a volume while we were
1609 * partition-salvaging, or we found more than 20 vols when salvaging a
1610 * particular volume. Abort if we detect this, since other programs
1611 * supposed to not touch the partition while it is partition-salvaging,
1612 * and we shouldn't find more than 20 vols in a VG.
1614 Abort("Found %ld vol headers, but should have found at most %ld! "
1615 "Make sure the volserver/fileserver are not running at the "
1616 "same time as a partition salvage\n",
1617 afs_printable_int32_ld(params->nVolumes),
1618 afs_printable_int32_ld(params->totalVolumes));
1621 memcpy(params->vsp, &summary, sizeof(summary));
1629 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1631 * If the header could not be read in at all, the header is always unlinked.
1632 * If instead RecordHeader said the header was bad (that is, the header file
1633 * is mis-named), we only unlink if we are doing a partition salvage, as
1634 * opposed to salvaging a specific volume group.
1636 * @param[in] dp the disk partition
1637 * @param[in] name full path to the .vol header
1638 * @param[in] hdr header data, or NULL if the header could not be read
1639 * @param[in] rock actually a struct SalvageScanParams*, with some information
1643 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1644 struct VolumeDiskHeader *hdr, void *rock)
1646 struct SalvageScanParams *params;
1649 params = (struct SalvageScanParams *)rock;
1652 /* no header; header is too bogus to read in at all */
1654 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1660 } else if (!params->singleVolumeNumber) {
1661 /* We were able to read in a header, but RecordHeader said something
1662 * was wrong with it. We only unlink those if we are doing a partition
1669 if (dounlink && unlink(name)) {
1670 Log("Error %d while trying to unlink %s\n", errno, name);
1675 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1676 * the fileserver for VG information, or by scanning the /vicepX partition.
1678 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1679 * are salvaging, or 0 if this is a partition
1682 * @return operation status
1684 * @retval -1 we raced with a fileserver restart; checking out and locking
1685 * volumes must be retried
1688 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1690 afs_int32 nvols = 0;
1691 struct SalvageScanParams params;
1694 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1696 /* we successfully got the vol information from the fileserver; no
1697 * need to scan the partition */
1701 /* we need to retry volume checkout */
1705 if (!singleVolumeNumber) {
1706 /* Count how many volumes we have in /vicepX */
1707 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1710 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1715 nvols = VOL_VG_MAX_VOLS;
1718 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1719 osi_Assert(salvinfo->volumeSummaryp != NULL);
1721 params.singleVolumeNumber = singleVolumeNumber;
1722 params.vsp = salvinfo->volumeSummaryp;
1723 params.nVolumes = 0;
1724 params.totalVolumes = nvols;
1726 params.salvinfo = salvinfo;
1728 /* walk the partition directory of volume headers and record the info
1729 * about them; unlinking invalid headers */
1730 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1731 UnlinkHeader, ¶ms);
1733 /* we apparently need to retry checking-out/locking volumes */
1737 Abort("Failed to get volume header summary\n");
1739 salvinfo->nVolumes = params.nVolumes;
1741 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1747 /* Find the link table. This should be associated with the RW volume or, if
1748 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1751 FindLinkHandle(struct InodeSummary *isp, int nVols,
1752 struct ViceInodeInfo *allInodes)
1755 struct ViceInodeInfo *ip;
1757 for (i = 0; i < nVols; i++) {
1758 ip = allInodes + isp[i].index;
1759 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1760 if (ip[j].u.special.type == VI_LINKTABLE)
1761 return ip[j].inodeNumber;
1768 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1770 struct versionStamp version;
1773 if (!VALID_INO(ino))
1775 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1776 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1777 if (!VALID_INO(ino))
1779 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1780 isp->RWvolumeId, errno);
1781 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1782 fdP = IH_OPEN(salvinfo->VGLinkH);
1784 Abort("Can't open link table for volume %u (error = %d)\n",
1785 isp->RWvolumeId, errno);
1787 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1788 Abort("Can't truncate link table for volume %u (error = %d)\n",
1789 isp->RWvolumeId, errno);
1791 version.magic = LINKTABLEMAGIC;
1792 version.version = LINKTABLEVERSION;
1794 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1796 Abort("Can't truncate link table for volume %u (error = %d)\n",
1797 isp->RWvolumeId, errno);
1799 FDH_REALLYCLOSE(fdP);
1801 /* If the volume summary exits (i.e., the V*.vol header file exists),
1802 * then set this inode there as well.
1804 if (isp->volSummary)
1805 isp->volSummary->header.linkTable = ino;
1814 SVGParms_t *parms = (SVGParms_t *) arg;
1815 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1820 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1823 pthread_attr_t tattr;
1827 /* Initialize per volume global variables, even if later code does so */
1828 salvinfo->VolumeChanged = 0;
1829 salvinfo->VGLinkH = NULL;
1830 salvinfo->VGLinkH_cnt = 0;
1831 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1833 parms.svgp_inodeSummaryp = isp;
1834 parms.svgp_count = nVols;
1835 parms.svgp_salvinfo = salvinfo;
1836 code = pthread_attr_init(&tattr);
1838 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1842 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1844 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1847 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1849 Log("Failed to create thread to salvage volume group %u\n",
1853 (void)pthread_join(tid, NULL);
1855 #endif /* AFS_NT40_ENV */
1858 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1860 struct ViceInodeInfo *inodes, *allInodes, *ip;
1861 int i, totalInodes, size, salvageTo;
1865 int dec_VGLinkH = 0;
1867 FdHandle_t *fdP = NULL;
1869 salvinfo->VGLinkH_cnt = 0;
1870 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1871 && isp->nSpecialInodes > 0);
1872 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1873 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1876 if (ShowMounts && !haveRWvolume)
1878 if (canfork && !debug && Fork() != 0) {
1879 (void)Wait("Salvage volume group");
1882 for (i = 0, totalInodes = 0; i < nVols; i++)
1883 totalInodes += isp[i].nInodes;
1884 size = totalInodes * sizeof(struct ViceInodeInfo);
1885 inodes = (struct ViceInodeInfo *)malloc(size);
1886 allInodes = inodes - isp->index; /* this would the base of all the inodes
1887 * for the partition, if all the inodes
1888 * had been read into memory */
1890 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1892 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1894 /* Don't try to salvage a read write volume if there isn't one on this
1896 salvageTo = haveRWvolume ? 0 : 1;
1898 #ifdef AFS_NAMEI_ENV
1899 ino = FindLinkHandle(isp, nVols, allInodes);
1900 if (VALID_INO(ino)) {
1901 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1904 if (!VALID_INO(ino) || fdP == NULL) {
1905 Log("%s link table for volume %u.\n",
1906 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1908 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1911 struct ViceInodeInfo *ip;
1912 CreateLinkTable(salvinfo, isp, ino);
1913 fdP = IH_OPEN(salvinfo->VGLinkH);
1914 /* Sync fake 1 link counts to the link table, now that it exists */
1916 for (i = 0; i < nVols; i++) {
1917 ip = allInodes + isp[i].index;
1918 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1919 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1926 FDH_REALLYCLOSE(fdP);
1928 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1931 /* Salvage in reverse order--read/write volume last; this way any
1932 * Inodes not referenced by the time we salvage the read/write volume
1933 * can be picked up by the read/write volume */
1934 /* ACTUALLY, that's not done right now--the inodes just vanish */
1935 for (i = nVols - 1; i >= salvageTo; i--) {
1937 struct InodeSummary *lisp = &isp[i];
1938 #ifdef AFS_NAMEI_ENV
1939 /* If only the RO is present on this partition, the link table
1940 * shows up as a RW volume special file. Need to make sure the
1941 * salvager doesn't try to salvage the non-existent RW.
1943 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1944 /* If this only special inode is the link table, continue */
1945 if (inodes->u.special.type == VI_LINKTABLE) {
1952 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1953 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1954 /* Check inodes twice. The second time do things seriously. This
1955 * way the whole RO volume can be deleted, below, if anything goes wrong */
1956 for (check = 1; check >= 0; check--) {
1958 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1960 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1961 if (rw && deleteMe) {
1962 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1963 * volume won't be called */
1969 if (rw && check == 1)
1971 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1972 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1978 /* Fix actual inode counts */
1981 Log("totalInodes %d\n",totalInodes);
1982 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1983 static int TraceBadLinkCounts = 0;
1984 #ifdef AFS_NAMEI_ENV
1985 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1986 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1987 VGLinkH_p1 = ip->u.param[0];
1988 continue; /* Deal with this last. */
1991 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1992 TraceBadLinkCounts--; /* Limit reports, per volume */
1993 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1995 while (ip->linkCount > 0) {
1996 /* below used to assert, not break */
1998 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1999 Log("idec failed. inode %s errno %d\n",
2000 PrintInode(stmp, ip->inodeNumber), errno);
2006 while (ip->linkCount < 0) {
2007 /* these used to be asserts */
2009 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2010 Log("iinc failed. inode %s errno %d\n",
2011 PrintInode(stmp, ip->inodeNumber), errno);
2018 #ifdef AFS_NAMEI_ENV
2019 while (dec_VGLinkH > 0) {
2020 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2021 Log("idec failed on link table, errno = %d\n", errno);
2025 while (dec_VGLinkH < 0) {
2026 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2027 Log("iinc failed on link table, errno = %d\n", errno);
2034 /* Directory consistency checks on the rw volume */
2036 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2037 IH_RELEASE(salvinfo->VGLinkH);
2039 if (canfork && !debug) {
2046 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2048 /* Check headers BEFORE forking */
2052 for (i = 0; i < nVols; i++) {
2053 struct VolumeSummary *vs = isp[i].volSummary;
2054 VolumeDiskData volHeader;
2056 /* Don't salvage just because phantom rw volume is there... */
2057 /* (If a read-only volume exists, read/write inodes must also exist) */
2058 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2062 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2063 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2064 == sizeof(volHeader)
2065 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2066 && volHeader.dontSalvage == DONT_SALVAGE
2067 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2068 if (volHeader.inUse != 0) {
2069 volHeader.inUse = 0;
2070 volHeader.inService = 1;
2072 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2073 != sizeof(volHeader)) {
2089 /* SalvageVolumeHeaderFile
2091 * Salvage the top level V*.vol header file. Make sure the special files
2092 * exist and that there are no duplicates.
2094 * Calls SalvageHeader for each possible type of volume special file.
2098 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2099 struct ViceInodeInfo *inodes, int RW,
2100 int check, int *deleteMe)
2103 struct ViceInodeInfo *ip;
2104 int allinodesobsolete = 1;
2105 struct VolumeDiskHeader diskHeader;
2106 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2108 struct VolumeHeader tempHeader;
2109 struct afs_inode_info stuff[MAXINODETYPE];
2111 /* keeps track of special inodes that are probably 'good'; they are
2112 * referenced in the vol header, and are included in the given inodes
2117 } goodspecial[MAXINODETYPE];
2122 memset(goodspecial, 0, sizeof(goodspecial));
2124 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2126 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2128 Log("cannot allocate memory for inode skip array when salvaging "
2129 "volume %lu; not performing duplicate special inode recovery\n",
2130 afs_printable_uint32_lu(isp->volumeId));
2131 /* still try to perform the salvage; the skip array only does anything
2132 * if we detect duplicate special inodes */
2135 init_inode_info(&tempHeader, stuff);
2138 * First, look at the special inodes and see if any are referenced by
2139 * the existing volume header. If we find duplicate special inodes, we
2140 * can use this information to use the referenced inode (it's more
2141 * likely to be the 'good' one), and throw away the duplicates.
2143 if (isp->volSummary && skip) {
2144 /* use tempHeader, so we can use the stuff[] array to easily index
2145 * into the isp->volSummary special inodes */
2146 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2148 for (i = 0; i < isp->nSpecialInodes; i++) {
2149 ip = &inodes[isp->index + i];
2150 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2151 /* will get taken care of in a later loop */
2154 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2155 goodspecial[ip->u.special.type-1].valid = 1;
2156 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2161 memset(&tempHeader, 0, sizeof(tempHeader));
2162 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2163 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2164 tempHeader.id = isp->volumeId;
2165 tempHeader.parent = isp->RWvolumeId;
2167 /* Check for duplicates (inodes are sorted by type field) */
2168 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2169 ip = &inodes[isp->index + i];
2170 if (ip->u.special.type == (ip + 1)->u.special.type) {
2171 afs_ino_str_t stmp1, stmp2;
2173 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2174 /* Will be caught in the loop below */
2178 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2179 ip->u.special.type, isp->volumeId,
2180 PrintInode(stmp1, ip->inodeNumber),
2181 PrintInode(stmp2, (ip+1)->inodeNumber));
2183 if (skip && goodspecial[ip->u.special.type-1].valid) {
2184 Inode gi = goodspecial[ip->u.special.type-1].inode;
2187 Log("using special inode referenced by vol header (%s)\n",
2188 PrintInode(stmp1, gi));
2191 /* the volume header references some special inode of
2192 * this type in the inodes array; are we it? */
2193 if (ip->inodeNumber != gi) {
2195 } else if ((ip+1)->inodeNumber != gi) {
2196 /* in case this is the last iteration; we need to
2197 * make sure we check ip+1, too */
2202 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2210 for (i = 0; i < isp->nSpecialInodes; i++) {
2212 ip = &inodes[isp->index + i];
2213 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2215 Log("Rubbish header inode %s of type %d\n",
2216 PrintInode(stmp, ip->inodeNumber),
2217 ip->u.special.type);
2223 Log("Rubbish header inode %s of type %d; deleted\n",
2224 PrintInode(stmp, ip->inodeNumber),
2225 ip->u.special.type);
2226 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2227 if (skip && skip[i]) {
2228 if (orphans == ORPH_REMOVE) {
2229 Log("Removing orphan special inode %s of type %d\n",
2230 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2233 Log("Ignoring orphan special inode %s of type %d\n",
2234 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2235 /* fall through to the ip->linkCount--; line below */
2238 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2239 allinodesobsolete = 0;
2241 if (!check && ip->u.special.type != VI_LINKTABLE)
2242 ip->linkCount--; /* Keep the inode around */
2250 if (allinodesobsolete) {
2257 salvinfo->VGLinkH_cnt++; /* one for every header. */
2259 if (!RW && !check && isp->volSummary) {
2260 ClearROInUseBit(isp->volSummary);
2264 for (i = 0; i < MAXINODETYPE; i++) {
2265 if (stuff[i].inodeType == VI_LINKTABLE) {
2266 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2267 * And we may have recreated the link table earlier, so set the
2268 * RW header as well.
2270 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2271 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2275 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2279 if (isp->volSummary == NULL) {
2281 char headerName[64];
2282 snprintf(headerName, sizeof headerName, VFORMAT,
2283 afs_printable_uint32_lu(isp->volumeId));
2284 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2285 salvinfo->fileSysPath, headerName);
2287 Log("No header file for volume %u\n", isp->volumeId);
2291 Log("No header file for volume %u; %screating %s\n",
2292 isp->volumeId, (Testing ? "it would have been " : ""),
2294 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2295 isp->volSummary->fileName = ToString(headerName);
2297 writefunc = VCreateVolumeDiskHeader;
2300 char headerName[64];
2301 /* hack: these two fields are obsolete... */
2302 isp->volSummary->header.volumeAcl = 0;
2303 isp->volSummary->header.volumeMountTable = 0;
2306 (&isp->volSummary->header, &tempHeader,
2307 sizeof(struct VolumeHeader))) {
2308 /* We often remove the name before calling us, so we make a fake one up */
2309 if (isp->volSummary->fileName) {
2310 strcpy(headerName, isp->volSummary->fileName);
2312 snprintf(headerName, sizeof headerName, VFORMAT,
2313 afs_printable_uint32_lu(isp->volumeId));
2314 isp->volSummary->fileName = ToString(headerName);
2316 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2317 salvinfo->fileSysPath, headerName);
2319 Log("Header file %s is damaged or no longer valid%s\n", path,
2320 (check ? "" : "; repairing"));
2324 writefunc = VWriteVolumeDiskHeader;
2328 memcpy(&isp->volSummary->header, &tempHeader,
2329 sizeof(struct VolumeHeader));
2332 Log("It would have written a new header file for volume %u\n",
2336 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2337 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2339 Log("Error %ld writing volume header file for volume %lu\n",
2340 afs_printable_int32_ld(code),
2341 afs_printable_uint32_lu(diskHeader.id));
2346 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2347 isp->volSummary->header.volumeInfo);
2352 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2353 struct InodeSummary *isp, int check, int *deleteMe)
2356 VolumeDiskData volumeInfo;
2357 struct versionStamp fileHeader;
2366 #ifndef AFS_NAMEI_ENV
2367 if (sp->inodeType == VI_LINKTABLE)
2370 if (*(sp->inode) == 0) {
2372 Log("Missing inode in volume header (%s)\n", sp->description);
2376 Log("Missing inode in volume header (%s); %s\n", sp->description,
2377 (Testing ? "it would have recreated it" : "recreating"));
2380 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2381 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2382 if (!VALID_INO(*(sp->inode)))
2384 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2385 sp->description, errno);
2390 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2391 fdP = IH_OPEN(specH);
2392 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2393 /* bail out early and destroy the volume */
2395 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2402 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2403 sp->description, errno);
2406 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2407 || header.fileHeader.magic != sp->stamp.magic)) {
2409 Log("Part of the header (%s) is corrupted\n", sp->description);
2410 FDH_REALLYCLOSE(fdP);
2414 Log("Part of the header (%s) is corrupted; recreating\n",
2417 /* header can be garbage; make sure we don't read garbage data from
2419 memset(&header, 0, sizeof(header));
2421 if (sp->inodeType == VI_VOLINFO
2422 && header.volumeInfo.destroyMe == DESTROY_ME) {
2425 FDH_REALLYCLOSE(fdP);
2429 if (recreate && !Testing) {
2432 ("Internal error: recreating volume header (%s) in check mode\n",
2434 nBytes = FDH_TRUNC(fdP, 0);
2436 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2437 sp->description, errno);
2439 /* The following code should be moved into vutil.c */
2440 if (sp->inodeType == VI_VOLINFO) {
2442 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2443 header.volumeInfo.stamp = sp->stamp;
2444 header.volumeInfo.id = isp->volumeId;
2445 header.volumeInfo.parentId = isp->RWvolumeId;
2446 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2447 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2448 isp->volumeId, isp->volumeId);
2449 header.volumeInfo.inService = 0;
2450 header.volumeInfo.blessed = 0;
2451 /* The + 1000 is a hack in case there are any files out in venus caches */
2452 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2453 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2454 header.volumeInfo.needsCallback = 0;
2455 gettimeofday(&tp, 0);
2456 header.volumeInfo.creationDate = tp.tv_sec;
2458 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2459 sizeof(header.volumeInfo), 0);
2460 if (nBytes != sizeof(header.volumeInfo)) {
2463 ("Unable to write volume header file (%s) (errno = %d)\n",
2464 sp->description, errno);
2465 Abort("Unable to write entire volume header file (%s)\n",
2469 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2470 if (nBytes != sizeof(sp->stamp)) {
2473 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2474 sp->description, errno);
2476 ("Unable to write entire version stamp in volume header file (%s)\n",
2481 FDH_REALLYCLOSE(fdP);
2483 if (sp->inodeType == VI_VOLINFO) {
2484 salvinfo->VolInfo = header.volumeInfo;
2488 if (salvinfo->VolInfo.updateDate) {
2489 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2491 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2492 salvinfo->VolInfo.id,
2493 (Testing ? "it would have been " : ""), update);
2495 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2497 Log("%s (%u) not updated (created %s)\n",
2498 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2508 SalvageVnodes(struct SalvInfo *salvinfo,
2509 struct InodeSummary *rwIsp,
2510 struct InodeSummary *thisIsp,
2511 struct ViceInodeInfo *inodes, int check)
2513 int ilarge, ismall, ioffset, RW, nInodes;
2514 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2517 RW = (rwIsp == thisIsp);
2518 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2520 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2521 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2522 if (check && ismall == -1)
2525 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2526 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2527 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2531 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2532 struct ViceInodeInfo *ip, int nInodes,
2533 struct VolumeSummary *volSummary, int check)
2535 char buf[SIZEOF_LARGEDISKVNODE];
2536 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2538 StreamHandle_t *file;
2539 struct VnodeClassInfo *vcp;
2541 afs_sfsize_t nVnodes;
2542 afs_fsize_t vnodeLength;
2544 afs_ino_str_t stmp1, stmp2;
2548 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2549 fdP = IH_OPEN(handle);
2550 osi_Assert(fdP != NULL);
2551 file = FDH_FDOPEN(fdP, "r+");
2552 osi_Assert(file != NULL);
2553 vcp = &VnodeClassInfo[class];
2554 size = OS_SIZE(fdP->fd_fd);
2555 osi_Assert(size != -1);
2556 nVnodes = (size / vcp->diskSize) - 1;
2558 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2559 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2563 for (vnodeIndex = 0;
2564 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2565 nVnodes--, vnodeIndex++) {
2566 if (vnode->type != vNull) {
2567 int vnodeChanged = 0;
2568 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2569 if (VNDISK_GET_INO(vnode) == 0) {
2571 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2572 memset(vnode, 0, vcp->diskSize);
2576 if (vcp->magic != vnode->vnodeMagic) {
2577 /* bad magic #, probably partially created vnode */
2579 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2580 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2581 afs_printable_uint32_lu(vcp->magic));
2582 memset(vnode, 0, vcp->diskSize);
2586 Log("Partially allocated vnode %d deleted.\n",
2588 memset(vnode, 0, vcp->diskSize);
2592 /* ****** Should do a bit more salvage here: e.g. make sure
2593 * vnode type matches what it should be given the index */
2594 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2595 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2596 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2597 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2604 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2605 /* The following doesn't work, because the version number
2606 * is not maintained correctly by the file server */
2607 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2608 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2610 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2616 /* For RW volume, look for vnode with matching inode number;
2617 * if no such match, take the first determined by our sort
2619 struct ViceInodeInfo *lip = ip;
2620 int lnInodes = nInodes;
2622 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2623 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2632 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2633 /* "Matching" inode */
2637 vu = vnode->uniquifier;
2638 iu = ip->u.vnode.vnodeUniquifier;
2639 vd = vnode->dataVersion;
2640 id = ip->u.vnode.inodeDataVersion;
2642 * Because of the possibility of the uniquifier overflows (> 4M)
2643 * we compare them modulo the low 22-bits; we shouldn't worry
2644 * about mismatching since they shouldn't to many old
2645 * uniquifiers of the same vnode...
2647 if (IUnique(vu) != IUnique(iu)) {
2649 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2652 vnode->uniquifier = iu;
2653 #ifdef AFS_3DISPARES
2654 vnode->dataVersion = (id >= vd ?
2657 1887437 ? vd : id) :
2660 1887437 ? id : vd));
2662 #if defined(AFS_SGI_EXMAG)
2663 vnode->dataVersion = (id >= vd ?
2666 15099494 ? vd : id) :
2669 15099494 ? id : vd));
2671 vnode->dataVersion = (id > vd ? id : vd);
2672 #endif /* AFS_SGI_EXMAG */
2673 #endif /* AFS_3DISPARES */
2676 /* don't bother checking for vd > id any more, since
2677 * partial file transfers always result in this state,
2678 * and you can't do much else anyway (you've already
2679 * found the best data you can) */
2680 #ifdef AFS_3DISPARES
2681 if (!vnodeIsDirectory(vnodeNumber)
2682 && ((vd < id && (id - vd) < 1887437)
2683 || ((vd > id && (vd - id) > 1887437)))) {
2685 #if defined(AFS_SGI_EXMAG)
2686 if (!vnodeIsDirectory(vnodeNumber)
2687 && ((vd < id && (id - vd) < 15099494)
2688 || ((vd > id && (vd - id) > 15099494)))) {
2690 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2691 #endif /* AFS_SGI_EXMAG */
2694 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2695 vnode->dataVersion = id;
2700 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2703 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2705 VNDISK_SET_INO(vnode, ip->inodeNumber);
2710 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2712 VNDISK_SET_INO(vnode, ip->inodeNumber);
2715 VNDISK_GET_LEN(vnodeLength, vnode);
2716 if (ip->byteCount != vnodeLength) {
2719 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2724 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2725 VNDISK_SET_LEN(vnode, ip->byteCount);
2729 ip->linkCount--; /* Keep the inode around */
2732 } else { /* no matching inode */
2734 if (VNDISK_GET_INO(vnode) != 0
2735 || vnode->type == vDirectory) {
2736 /* No matching inode--get rid of the vnode */
2738 if (VNDISK_GET_INO(vnode)) {
2740 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2744 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2749 if (VNDISK_GET_INO(vnode)) {
2751 time_t serverModifyTime = vnode->serverModifyTime;
2752 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2756 time_t serverModifyTime = vnode->serverModifyTime;
2757 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2760 memset(vnode, 0, vcp->diskSize);
2763 /* Should not reach here becuase we checked for
2764 * (inodeNumber == 0) above. And where we zero the vnode,
2765 * we also goto vnodeDone.
2769 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2773 } /* VNDISK_GET_INO(vnode) != 0 */
2775 osi_Assert(!(vnodeChanged && check));
2776 if (vnodeChanged && !Testing) {
2777 osi_Assert(IH_IWRITE
2778 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2779 (char *)vnode, vcp->diskSize)
2781 salvinfo->VolumeChanged = 1; /* For break call back */
2792 struct VnodeEssence *
2793 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2796 struct VnodeInfo *vip;
2799 class = vnodeIdToClass(vnodeNumber);
2800 vip = &salvinfo->vnodeInfo[class];
2801 offset = vnodeIdToBitNumber(vnodeNumber);
2802 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2806 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2808 /* Copy the directory unconditionally if we are going to change it:
2809 * not just if was cloned.
2811 struct VnodeDiskObject vnode;
2812 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2813 Inode oldinode, newinode;
2816 if (dir->copied || Testing)
2818 DFlush(); /* Well justified paranoia... */
2821 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2822 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2824 osi_Assert(code == sizeof(vnode));
2825 oldinode = VNDISK_GET_INO(&vnode);
2826 /* Increment the version number by a whole lot to avoid problems with
2827 * clients that were promised new version numbers--but the file server
2828 * crashed before the versions were written to disk.
2831 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2832 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2834 osi_Assert(VALID_INO(newinode));
2835 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2837 VNDISK_SET_INO(&vnode, newinode);
2839 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2840 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2842 osi_Assert(code == sizeof(vnode));
2844 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2845 salvinfo->fileSysDevice, newinode,
2846 &salvinfo->VolumeChanged);
2847 /* Don't delete the original inode right away, because the directory is
2848 * still being scanned.
2854 * This function should either successfully create a new dir, or give up
2855 * and leave things the way they were. In particular, if it fails to write
2856 * the new dir properly, it should return w/o changing the reference to the
2860 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2862 struct VnodeDiskObject vnode;
2863 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2864 Inode oldinode, newinode;
2869 afs_int32 parentUnique = 1;
2870 struct VnodeEssence *vnodeEssence;
2875 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2877 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2878 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2880 osi_Assert(lcode == sizeof(vnode));
2881 oldinode = VNDISK_GET_INO(&vnode);
2882 /* Increment the version number by a whole lot to avoid problems with
2883 * clients that were promised new version numbers--but the file server
2884 * crashed before the versions were written to disk.
2887 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2888 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2890 osi_Assert(VALID_INO(newinode));
2891 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2892 &salvinfo->VolumeChanged);
2894 /* Assign . and .. vnode numbers from dir and vnode.parent.
2895 * The uniquifier for . is in the vnode.
2896 * The uniquifier for .. might be set to a bogus value of 1 and
2897 * the salvager will later clean it up.
2899 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2900 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2903 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2905 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2910 /* didn't really build the new directory properly, let's just give up. */
2911 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2912 Log("Directory salvage returned code %d, continuing.\n", code);
2914 Log("also failed to decrement link count on new inode");
2918 Log("Checking the results of the directory salvage...\n");
2919 if (!DirOK(&newdir)) {
2920 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2921 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2922 osi_Assert(code == 0);
2926 VNDISK_SET_INO(&vnode, newinode);
2927 length = Length(&newdir);
2928 VNDISK_SET_LEN(&vnode, length);
2930 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2931 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2933 osi_Assert(lcode == sizeof(vnode));
2936 nt_sync(salvinfo->fileSysDevice);
2938 sync(); /* this is slow, but hopefully rarely called. We don't have
2939 * an open FD on the file itself to fsync.
2943 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2945 /* make sure old directory file is really closed */
2946 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2947 FDH_REALLYCLOSE(fdP);
2949 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2950 osi_Assert(code == 0);
2951 dir->dirHandle = newdir;
2955 * arguments for JudgeEntry.
2957 struct judgeEntry_params {
2958 struct DirSummary *dir; /**< directory we're examining entries in */
2959 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2963 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2966 struct judgeEntry_params *params = arock;
2967 struct DirSummary *dir = params->dir;
2968 struct SalvInfo *salvinfo = params->salvinfo;
2969 struct VnodeEssence *vnodeEssence;
2970 afs_int32 dirOrphaned, todelete;
2972 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2974 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2975 if (vnodeEssence == NULL) {
2977 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2980 CopyOnWrite(salvinfo, dir);
2981 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2986 #ifndef AFS_NAMEI_ENV
2987 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2988 * mount inode for the partition. If this inode were deleted, it would crash
2991 if (vnodeEssence->InodeNumber == 0) {
2992 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2994 CopyOnWrite(salvinfo, dir);
2995 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3002 if (!(vnodeNumber & 1) && !Showmode
3003 && !(vnodeEssence->count || vnodeEssence->unique
3004 || vnodeEssence->modeBits)) {
3005 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3006 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3007 vnodeNumber, unique,
3008 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3012 CopyOnWrite(salvinfo, dir);
3013 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3019 /* Check if the Uniquifiers match. If not, change the directory entry
3020 * so its unique matches the vnode unique. Delete if the unique is zero
3021 * or if the directory is orphaned.
3023 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3024 if (!vnodeEssence->unique
3025 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3026 /* This is an orphaned directory. Don't delete the . or ..
3027 * entry. Otherwise, it will get created in the next
3028 * salvage and deleted again here. So Just skip it.
3033 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3036 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3040 fid.Vnode = vnodeNumber;
3041 fid.Unique = vnodeEssence->unique;
3042 CopyOnWrite(salvinfo, dir);
3043 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3045 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3048 return 0; /* no need to continue */
3051 if (strcmp(name, ".") == 0) {
3052 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3055 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3057 CopyOnWrite(salvinfo, dir);
3058 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3059 fid.Vnode = dir->vnodeNumber;
3060 fid.Unique = dir->unique;
3061 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3064 vnodeNumber = fid.Vnode; /* Get the new Essence */
3065 unique = fid.Unique;
3066 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3069 } else if (strcmp(name, "..") == 0) {
3072 struct VnodeEssence *dotdot;
3073 pa.Vnode = dir->parent;
3074 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3075 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3076 pa.Unique = dotdot->unique;
3078 pa.Vnode = dir->vnodeNumber;
3079 pa.Unique = dir->unique;
3081 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3083 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3085 CopyOnWrite(salvinfo, dir);
3086 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3087 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3090 vnodeNumber = pa.Vnode; /* Get the new Essence */
3092 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3094 dir->haveDotDot = 1;
3095 } else if (strncmp(name, ".__afs", 6) == 0) {
3097 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3100 CopyOnWrite(salvinfo, dir);
3101 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3103 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3104 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3107 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3108 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3109 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3110 && !(vnodeEssence->modeBits & 0111)) {
3111 afs_sfsize_t nBytes;
3117 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3118 vnodeEssence->InodeNumber);
3121 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3125 size = FDH_SIZE(fdP);
3127 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3128 FDH_REALLYCLOSE(fdP);
3135 nBytes = FDH_PREAD(fdP, buf, size, 0);
3136 if (nBytes == size) {
3138 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3139 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3140 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3141 Testing ? "would convert" : "converted");
3142 vnodeEssence->modeBits |= 0111;
3143 vnodeEssence->changed = 1;
3144 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3145 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3146 dir->name ? dir->name : "??", name, buf);
3148 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3149 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3151 FDH_REALLYCLOSE(fdP);
3154 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3155 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3156 if (vnodeIdToClass(vnodeNumber) == vLarge
3157 && vnodeEssence->name == NULL) {
3159 if ((n = (char *)malloc(strlen(name) + 1)))
3161 vnodeEssence->name = n;
3164 /* The directory entry points to the vnode. Check to see if the
3165 * vnode points back to the directory. If not, then let the
3166 * directory claim it (else it might end up orphaned). Vnodes
3167 * already claimed by another directory are deleted from this
3168 * directory: hardlinks to the same vnode are not allowed
3169 * from different directories.
3171 if (vnodeEssence->parent != dir->vnodeNumber) {
3172 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3173 /* Vnode does not point back to this directory.
3174 * Orphaned dirs cannot claim a file (it may belong to
3175 * another non-orphaned dir).
3178 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3180 vnodeEssence->parent = dir->vnodeNumber;
3181 vnodeEssence->changed = 1;
3183 /* Vnode was claimed by another directory */
3186 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3187 } else if (vnodeNumber == 1) {
3188 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3190 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3194 CopyOnWrite(salvinfo, dir);
3195 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3200 /* This directory claims the vnode */
3201 vnodeEssence->claimed = 1;
3203 vnodeEssence->count--;
3208 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3209 VnodeClass class, Inode ino, Unique * maxu)
3211 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3212 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3213 char buf[SIZEOF_LARGEDISKVNODE];
3214 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3216 StreamHandle_t *file;
3221 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3222 fdP = IH_OPEN(vip->handle);
3223 osi_Assert(fdP != NULL);
3224 file = FDH_FDOPEN(fdP, "r+");
3225 osi_Assert(file != NULL);
3226 size = OS_SIZE(fdP->fd_fd);
3227 osi_Assert(size != -1);
3228 vip->nVnodes = (size / vcp->diskSize) - 1;
3229 if (vip->nVnodes > 0) {
3230 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3231 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3232 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3233 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3234 if (class == vLarge) {
3235 osi_Assert((vip->inodes = (Inode *)
3236 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3245 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3246 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3247 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3248 nVnodes--, vnodeIndex++) {
3249 if (vnode->type != vNull) {
3250 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3251 afs_fsize_t vnodeLength;
3252 vip->nAllocatedVnodes++;
3253 vep->count = vnode->linkCount;
3254 VNDISK_GET_LEN(vnodeLength, vnode);
3255 vep->blockCount = nBlocks(vnodeLength);
3256 vip->volumeBlockCount += vep->blockCount;
3257 vep->parent = vnode->parent;
3258 vep->unique = vnode->uniquifier;
3259 if (*maxu < vnode->uniquifier)
3260 *maxu = vnode->uniquifier;
3261 vep->modeBits = vnode->modeBits;
3262 vep->InodeNumber = VNDISK_GET_INO(vnode);
3263 vep->type = vnode->type;
3264 vep->author = vnode->author;
3265 vep->owner = vnode->owner;
3266 vep->group = vnode->group;
3267 if (vnode->type == vDirectory) {
3268 if (class != vLarge) {
3269 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3270 vip->nAllocatedVnodes--;
3271 memset(vnode, 0, sizeof(vnode));
3272 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3273 vnodeIndexOffset(vcp, vnodeNumber),
3274 (char *)&vnode, sizeof(vnode));
3275 salvinfo->VolumeChanged = 1;
3277 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3286 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3289 struct VnodeEssence *parentvp;
3295 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3296 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3297 strcat(path, OS_DIRSEP);
3298 strcat(path, vp->name);
3304 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3305 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3308 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3310 struct VnodeEssence *vep;
3313 return (1); /* Vnode zero does not exist */
3315 return (0); /* The root dir vnode is always claimed */
3316 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3317 if (!vep || !vep->claimed)
3318 return (1); /* Vnode is not claimed - it is orphaned */
3320 return (IsVnodeOrphaned(salvinfo, vep->parent));
3324 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3325 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3326 struct DirSummary *rootdir, int *rootdirfound)
3328 static struct DirSummary dir;
3329 static struct DirHandle dirHandle;
3330 struct VnodeEssence *parent;
3331 static char path[MAXPATHLEN];
3334 if (dirVnodeInfo->vnodes[i].salvaged)
3335 return; /* already salvaged */
3338 dirVnodeInfo->vnodes[i].salvaged = 1;
3340 if (dirVnodeInfo->inodes[i] == 0)
3341 return; /* Not allocated to a directory */
3343 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3344 if (dirVnodeInfo->vnodes[i].parent) {
3345 Log("Bad parent, vnode 1; %s...\n",
3346 (Testing ? "skipping" : "salvaging"));
3347 dirVnodeInfo->vnodes[i].parent = 0;
3348 dirVnodeInfo->vnodes[i].changed = 1;
3351 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3352 if (parent && parent->salvaged == 0)
3353 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3354 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3355 rootdir, rootdirfound);
3358 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3359 dir.unique = dirVnodeInfo->vnodes[i].unique;
3362 dir.parent = dirVnodeInfo->vnodes[i].parent;
3363 dir.haveDot = dir.haveDotDot = 0;
3364 dir.ds_linkH = alinkH;
3365 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3366 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3368 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3371 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3372 (Testing ? "skipping" : "salvaging"));
3375 CopyAndSalvage(salvinfo, &dir);
3377 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3380 dirHandle = dir.dirHandle;
3383 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3384 &dirVnodeInfo->vnodes[i], path);
3387 /* If enumeration failed for random reasons, we will probably delete
3388 * too much stuff, so we guard against this instead.
3390 struct judgeEntry_params judge_params;
3391 judge_params.salvinfo = salvinfo;
3392 judge_params.dir = &dir;
3394 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3397 /* Delete the old directory if it was copied in order to salvage.
3398 * CopyOnWrite has written the new inode # to the disk, but we still
3399 * have the old one in our local structure here. Thus, we idec the