2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #include <afs/afs_assert.h>
105 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
106 #if defined(AFS_VFSINCL_ENV)
107 #include <sys/vnode.h>
109 #include <sys/fs/ufs_inode.h>
111 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
112 #include <ufs/ufs/dinode.h>
113 #include <ufs/ffs/fs.h>
115 #include <ufs/inode.h>
118 #else /* AFS_VFSINCL_ENV */
120 #include <ufs/inode.h>
121 #else /* AFS_OSF_ENV */
122 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
123 #include <sys/inode.h>
126 #endif /* AFS_VFSINCL_ENV */
127 #endif /* AFS_SGI_ENV */
130 #include <sys/lockf.h>
133 #include <checklist.h>
135 #if defined(AFS_SGI_ENV)
138 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
140 #include <sys/mnttab.h>
141 #include <sys/mntent.h>
146 #endif /* AFS_SGI_ENV */
147 #endif /* AFS_HPUX_ENV */
151 #include <afs/osi_inode.h>
155 #include <afs/afsutil.h>
156 #include <afs/fileutil.h>
161 #include <afs/afssyscalls.h>
165 #include "partition.h"
166 #include "daemon_com.h"
167 #include "daemon_com_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[8]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = (struct job *)malloc(sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 memset(thisjob, 0, sizeof(struct job));
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 osi_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 osi_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
615 openlog("salvager", LOG_PID, useSyslogFacility);
619 snprintf(logFileName, sizeof logFileName, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb);
622 logFile = afs_fopen(logFileName, "w");
627 SalvageFileSys1(jobs[startjob]->partP, 0);
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
634 /* If waited for all jobs to complete, now collect log files and return */
636 if (!useSyslog) /* if syslogging - no need to collect */
639 for (i = 0; i < jobcount; i++) {
640 snprintf(logFileName, sizeof logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
642 if ((passLog = afs_fopen(logFileName, "r"))) {
643 while (fgets(buf, sizeof(buf), passLog)) {
648 (void)unlink(logFileName);
657 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
659 if (!canfork || debug || Fork() == 0) {
660 SalvageFileSys1(partP, singleVolumeNumber);
661 if (canfork && !debug) {
666 Wait("SalvageFileSys");
670 get_DevName(char *pbuffer, char *wpath)
672 char pbuf[128], *ptr;
673 strcpy(pbuf, pbuffer);
674 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
680 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
682 strcpy(pbuffer, ptr + 1);
689 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 char inodeListPath[256];
693 FD_t inodeFile = INVALID_FD;
694 static char tmpDevName[100];
695 static char wpath[100];
696 struct VolumeSummary *vsp, *esp;
700 struct SalvInfo l_salvinfo;
701 struct SalvInfo *salvinfo = &l_salvinfo;
704 memset(salvinfo, 0, sizeof(*salvinfo));
707 if (inodeFile != INVALID_FD) {
709 inodeFile = INVALID_FD;
711 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
712 Abort("Raced too many times with fileserver restarts while trying to "
713 "checkout/lock volumes; Aborted\n");
715 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
717 /* unlock all previous volume locks, since we're about to lock them
719 VLockFileReinit(&partP->volLockFile);
721 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
723 salvinfo->fileSysPartition = partP;
724 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
725 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
730 name = partP->devName;
732 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 salvinfo->fileSysDeviceName = name;
736 salvinfo->filesysfulldev = wpath;
739 if (singleVolumeNumber) {
740 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
741 /* only non-DAFS locks the partition when salvaging a single volume;
742 * DAFS will lock the individual volumes in the VG */
743 VLockPartition(partP->name);
744 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
748 /* salvageserver already setup fssync conn for us */
749 if ((programType != salvageServer) && !VConnectFS()) {
750 Abort("Couldn't connect to file server\n");
753 salvinfo->useFSYNC = 1;
754 AskOffline(salvinfo, singleVolumeNumber);
755 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
756 if (LockVolume(salvinfo, singleVolumeNumber)) {
759 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
762 salvinfo->useFSYNC = 0;
763 VLockPartition(partP->name);
767 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
770 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
771 partP->name, name, (Testing ? "(READONLY mode)" : ""));
773 Log("***Forced salvage of all volumes on this partition***\n");
778 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
785 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
786 while ((dp = readdir(dirp))) {
787 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
788 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
790 Log("Removing old salvager temp files %s\n", dp->d_name);
791 strcpy(npath, salvinfo->fileSysPath);
792 strcat(npath, OS_DIRSEP);
793 strcat(npath, dp->d_name);
799 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
801 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
802 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
804 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
808 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
809 if (inodeFile == INVALID_FD) {
810 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
813 /* Using nt_unlink here since we're really using the delete on close
814 * semantics of unlink. In most places in the salvager, we really do
815 * mean to unlink the file at that point. Those places have been
816 * modified to actually do that so that the NT crt can be used there.
818 * jaltman - On NT delete on close cannot be applied to a file while the
819 * process has an open file handle that does not have DELETE file
820 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
821 * delete privileges. As a result the nt_unlink() call will always
824 code = nt_unlink(inodeListPath);
826 code = unlink(inodeListPath);
829 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
832 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
836 salvinfo->inodeFd = inodeFile;
837 if (salvinfo->inodeFd == INVALID_FD)
838 Abort("Temporary file %s is missing...\n", inodeListPath);
839 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
840 if (ListInodeOption) {
841 PrintInodeList(salvinfo);
842 if (singleVolumeNumber) {
843 /* We've checked out the volume from the fileserver, and we need
844 * to give it back. We don't know if the volume exists or not,
845 * so we don't know whether to AskOnline or not. Try to determine
846 * if the volume exists by trying to read the volume header, and
847 * AskOnline if it is readable. */
848 MaybeAskOnline(salvinfo, singleVolumeNumber);
852 /* enumerate volumes in the partition.
853 * figure out sets of read-only + rw volumes.
854 * salvage each set, read-only volumes first, then read-write.
855 * Fix up inodes on last volume in set (whether it is read-write
858 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
862 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
863 i < salvinfo->nVolumesInInodeFile; i = j) {
864 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
866 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
868 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
869 struct VolumeSummary *tsp;
870 /* Scan volume list (from partition root directory) looking for the
871 * current rw volume number in the volume list from the inode scan.
872 * If there is one here that is not in the inode volume list,
874 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
876 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
878 /* Now match up the volume summary info from the root directory with the
879 * entry in the volume list obtained from scanning inodes */
880 salvinfo->inodeSummary[j].volSummary = NULL;
881 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
882 if (tsp->header.id == vid) {
883 salvinfo->inodeSummary[j].volSummary = tsp;
889 /* Salvage the group of volumes (several read-only + 1 read/write)
890 * starting with the current read-only volume we're looking at.
892 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
895 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
896 for (; vsp < esp; vsp++) {
898 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
901 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
902 RemoveTheForce(salvinfo->fileSysPath);
904 if (!Testing && singleVolumeNumber) {
906 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
907 /* unlock vol headers so the fs can attach them when we AskOnline */
908 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
909 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
911 /* Step through the volumeSummary list and set all volumes on-line.
912 * Most volumes were taken off-line in GetVolumeSummary.
913 * If a volume was deleted, don't tell the fileserver anything, since
914 * we already told the fileserver the volume was deleted back when we
915 * we destroyed the volume header.
916 * Also, make sure we bring the singleVolumeNumber back online first.
919 for (j = 0; j < salvinfo->nVolumes; j++) {
920 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
922 if (!salvinfo->volumeSummaryp[j].deleted) {
923 AskOnline(salvinfo, singleVolumeNumber);
929 /* If singleVolumeNumber is not in our volumeSummary, it means that
930 * at least one other volume in the VG is on the partition, but the
931 * RW volume is not. We've already AskOffline'd it by now, though,
932 * so make sure we don't still have the volume checked out. */
933 AskDelete(salvinfo, singleVolumeNumber);
936 for (j = 0; j < salvinfo->nVolumes; j++) {
937 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
938 if (!salvinfo->volumeSummaryp[j].deleted) {
939 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
945 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
946 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
949 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
953 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
956 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
959 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
962 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
964 Log("Error %ld destroying volume disk header for volume %lu\n",
965 afs_printable_int32_ld(code),
966 afs_printable_uint32_lu(vsp->header.id));
969 /* make sure we actually delete the fileName file; ENOENT
970 * is fine, since VDestroyVolumeDiskHeader probably already
972 if (unlink(path) && errno != ENOENT) {
973 Log("Unable to unlink %s (errno = %d)\n", path, errno);
975 if (salvinfo->useFSYNC) {
976 AskDelete(salvinfo, vsp->header.id);
984 CompareInodes(const void *_p1, const void *_p2)
986 const struct ViceInodeInfo *p1 = _p1;
987 const struct ViceInodeInfo *p2 = _p2;
988 if (p1->u.vnode.vnodeNumber == INODESPECIAL
989 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
990 VolumeId p1rwid, p2rwid;
992 (p1->u.vnode.vnodeNumber ==
993 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
995 (p2->u.vnode.vnodeNumber ==
996 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1001 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1002 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1003 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1004 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1005 if (p1->u.vnode.volumeId == p1rwid)
1007 if (p2->u.vnode.volumeId == p2rwid)
1009 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1011 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1012 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1013 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1015 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1017 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1019 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1021 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1023 /* The following tests are reversed, so that the most desirable
1024 * of several similar inodes comes first */
1025 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1026 #ifdef AFS_3DISPARES
1027 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1028 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1031 #ifdef AFS_SGI_EXMAG
1032 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1033 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1038 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1039 #ifdef AFS_3DISPARES
1040 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1041 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1044 #ifdef AFS_SGI_EXMAG
1045 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1046 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1051 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1052 #ifdef AFS_3DISPARES
1053 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1054 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1057 #ifdef AFS_SGI_EXMAG
1058 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1059 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1064 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1065 #ifdef AFS_3DISPARES
1066 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1067 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1070 #ifdef AFS_SGI_EXMAG
1071 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1072 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1081 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1082 struct InodeSummary *summary)
1084 VolumeId volume = ip->u.vnode.volumeId;
1085 VolumeId rwvolume = volume;
1090 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1092 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1094 rwvolume = ip->u.special.parentId;
1095 /* This isn't quite right, as there could (in error) be different
1096 * parent inodes in different special vnodes */
1098 if (maxunique < ip->u.vnode.vnodeUniquifier)
1099 maxunique = ip->u.vnode.vnodeUniquifier;
1103 summary->volumeId = volume;
1104 summary->RWvolumeId = rwvolume;
1105 summary->nInodes = n;
1106 summary->nSpecialInodes = nSpecial;
1107 summary->maxUniquifier = maxunique;
1111 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1113 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1114 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1115 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1120 * Collect list of inodes in file named by path. If a truly fatal error,
1121 * unlink the file and abort. For lessor errors, return -1. The file will
1122 * be unlinked by the caller.
1125 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1129 struct ViceInodeInfo *ip, *ip_save;
1130 struct InodeSummary summary;
1131 char summaryFileName[50];
1132 FD_t summaryFile = INVALID_FD;
1134 char *dev = salvinfo->fileSysPath;
1135 char *wpath = salvinfo->fileSysPath;
1137 char *dev = salvinfo->fileSysDeviceName;
1138 char *wpath = salvinfo->filesysfulldev;
1140 char *part = salvinfo->fileSysPath;
1145 afs_sfsize_t st_size;
1147 /* This file used to come from vfsck; cobble it up ourselves now... */
1149 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1150 singleVolumeNumber ? OnlyOneVolume : 0,
1151 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1153 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1157 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1159 if (forceSal && !ForceSalvage) {
1160 Log("***Forced salvage of all volumes on this partition***\n");
1163 OS_SEEK(inodeFile, 0L, SEEK_SET);
1164 salvinfo->inodeFd = inodeFile;
1165 if (salvinfo->inodeFd == INVALID_FD ||
1166 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1167 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1169 tdir = (tmpdir ? tmpdir : part);
1171 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1172 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1174 snprintf(summaryFileName, sizeof summaryFileName,
1175 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1177 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1178 if (summaryFile == INVALID_FD) {
1179 Abort("Unable to create inode summary file\n");
1183 /* Using nt_unlink here since we're really using the delete on close
1184 * semantics of unlink. In most places in the salvager, we really do
1185 * mean to unlink the file at that point. Those places have been
1186 * modified to actually do that so that the NT crt can be used there.
1188 * jaltman - As commented elsewhere, this cannot work because fopen()
1189 * does not open files with DELETE and FILE_SHARE_DELETE.
1191 code = nt_unlink(summaryFileName);
1193 code = unlink(summaryFileName);
1196 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1199 if (!canfork || debug || Fork() == 0) {
1200 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1202 OS_CLOSE(summaryFile);
1203 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1204 RemoveTheForce(salvinfo->fileSysPath);
1206 struct VolumeSummary *vsp;
1210 GetVolumeSummary(salvinfo, singleVolumeNumber);
1212 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1213 if (vsp->fileName) {
1214 if (vsp->header.id == singleVolumeNumber) {
1217 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1223 MaybeAskOnline(salvinfo, singleVolumeNumber);
1225 /* make sure we get rid of stray .vol headers, even if
1226 * they're not in our volume summary (might happen if
1227 * e.g. something else created them and they're not in the
1228 * fileserver VGC) */
1229 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1230 singleVolumeNumber, 0 /*parent*/);
1231 AskDelete(salvinfo, singleVolumeNumber);
1235 Log("%s vice inodes on %s; not salvaged\n",
1236 singleVolumeNumber ? "No applicable" : "No", dev);
1241 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1243 OS_CLOSE(summaryFile);
1245 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1248 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1249 OS_CLOSE(summaryFile);
1250 Abort("Unable to read inode table; %s not salvaged\n", dev);
1252 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1253 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1254 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1255 OS_CLOSE(summaryFile);
1256 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1261 CountVolumeInodes(ip, nInodes, &summary);
1262 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1263 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1264 OS_CLOSE(summaryFile);
1268 summary.index += (summary.nInodes);
1269 nInodes -= summary.nInodes;
1270 ip += summary.nInodes;
1273 ip = ip_save = NULL;
1274 /* Following fflush is not fclose, because if it was debug mode would not work */
1275 if (OS_SYNC(summaryFile) == -1) {
1276 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1277 OS_CLOSE(summaryFile);
1281 if (canfork && !debug) {
1286 if (Wait("Inode summary") == -1) {
1287 OS_CLOSE(summaryFile);
1288 Exit(1); /* salvage of this partition aborted */
1292 st_size = OS_SIZE(summaryFile);
1293 osi_Assert(st_size >= 0);
1296 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1297 osi_Assert(salvinfo->inodeSummary != NULL);
1298 /* For GNU we need to do lseek to get the file pointer moved. */
1299 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1300 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1301 osi_Assert(ret == st_size);
1303 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1304 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1305 salvinfo->inodeSummary[i].volSummary = NULL;
1307 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1308 OS_CLOSE(summaryFile);
1311 if (retcode && singleVolumeNumber && !deleted) {
1312 AskError(salvinfo, singleVolumeNumber);
1318 /* Comparison routine for volume sort.
1319 This is setup so that a read-write volume comes immediately before
1320 any read-only clones of that volume */
1322 CompareVolumes(const void *_p1, const void *_p2)
1324 const struct VolumeSummary *p1 = _p1;
1325 const struct VolumeSummary *p2 = _p2;
1326 if (p1->header.parent != p2->header.parent)
1327 return p1->header.parent < p2->header.parent ? -1 : 1;
1328 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1330 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1332 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1336 * Gleans volumeSummary information by asking the fileserver
1338 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1339 * salvaging a whole partition
1341 * @return whether we obtained the volume summary information or not
1342 * @retval 0 success; we obtained the volume summary information
1343 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1345 * @retval 1 we did not get the volume summary information; either the
1346 * fileserver responded with an error, or we are not supposed to
1347 * ask the fileserver for the information (e.g. we are salvaging
1348 * the entire partition or we are not the salvageserver)
1350 * @note for non-DAFS, always returns 1
1353 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1356 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1357 if (programType == salvageServer) {
1358 if (singleVolumeNumber) {
1359 FSSYNC_VGQry_response_t q_res;
1361 struct VolumeSummary *vsp;
1363 struct VolumeDiskHeader diskHdr;
1365 memset(&res, 0, sizeof(res));
1367 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1370 * We must wait for the partition to finish scanning before
1371 * can continue, since we will not know if we got the entire
1372 * VG membership unless the partition is fully scanned.
1373 * We could, in theory, just scan the partition ourselves if
1374 * the VG cache is not ready, but we would be doing the exact
1375 * same scan the fileserver is doing; it will almost always
1376 * be faster to wait for the fileserver. The only exceptions
1377 * are if the partition does not take very long to scan, and
1378 * in that case it's fast either way, so who cares?
1380 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1381 Log("waiting for fileserver to finish scanning partition %s...\n",
1382 salvinfo->fileSysPartition->name);
1384 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1385 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1386 * just so small partitions don't need to wait over 10
1387 * seconds every time, and large partitions are generally
1388 * polled only once every ten seconds. */
1389 sleep((i > 10) ? (i = 10) : i);
1391 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1395 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1396 /* This can happen if there's no header for the volume
1397 * we're salvaging, or no headers exist for the VG (if
1398 * we're salvaging an RW). Act as if we got a response
1399 * with no VG members. The headers may be created during
1400 * salvaging, if there are inodes in this VG. */
1402 memset(&q_res, 0, sizeof(q_res));
1403 q_res.rw = singleVolumeNumber;
1407 Log("fileserver refused VGCQuery request for volume %lu on "
1408 "partition %s, code %ld reason %ld\n",
1409 afs_printable_uint32_lu(singleVolumeNumber),
1410 salvinfo->fileSysPartition->name,
1411 afs_printable_int32_ld(code),
1412 afs_printable_int32_ld(res.hdr.reason));
1416 if (q_res.rw != singleVolumeNumber) {
1417 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1418 afs_printable_uint32_lu(singleVolumeNumber),
1419 afs_printable_uint32_lu(q_res.rw));
1420 #ifdef SALVSYNC_BUILD_CLIENT
1421 if (SALVSYNC_LinkVolume(q_res.rw,
1423 salvinfo->fileSysPartition->name,
1425 Log("schedule request failed\n");
1427 #endif /* SALVSYNC_BUILD_CLIENT */
1428 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1431 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1432 osi_Assert(salvinfo->volumeSummaryp != NULL);
1434 salvinfo->nVolumes = 0;
1435 vsp = salvinfo->volumeSummaryp;
1437 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1438 char name[VMAXPATHLEN];
1440 if (!q_res.children[i]) {
1444 /* AskOffline for singleVolumeNumber was called much earlier */
1445 if (q_res.children[i] != singleVolumeNumber) {
1446 AskOffline(salvinfo, q_res.children[i]);
1447 if (LockVolume(salvinfo, q_res.children[i])) {
1453 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1455 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1456 afs_printable_uint32_lu(q_res.children[i]));
1461 DiskToVolumeHeader(&vsp->header, &diskHdr);
1462 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1463 vsp->fileName = ToString(name);
1464 salvinfo->nVolumes++;
1468 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1473 Log("Cannot get volume summary from fileserver; falling back to scanning "
1474 "entire partition\n");
1477 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1482 * count how many volume headers are found by VWalkVolumeHeaders.
1484 * @param[in] dp the disk partition (unused)
1485 * @param[in] name full path to the .vol header (unused)
1486 * @param[in] hdr the header data (unused)
1487 * @param[in] last whether this is the last try or not (unused)
1488 * @param[in] rock actually an afs_int32*; the running count of how many
1489 * volumes we have found
1494 CountHeader(struct DiskPartition64 *dp, const char *name,
1495 struct VolumeDiskHeader *hdr, int last, void *rock)
1497 afs_int32 *nvols = (afs_int32 *)rock;
1503 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1506 struct SalvageScanParams {
1507 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1508 * vol id of the VG we're salvaging */
1509 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1510 * we're filling in */
1511 afs_int32 nVolumes; /**< # of vols we've encountered */
1512 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1513 * # of vols we've alloc'd memory for) */
1514 int retry; /**< do we need to retry vol lock/checkout? */
1515 struct SalvInfo *salvinfo; /**< salvage job info */
1519 * records volume summary info found from VWalkVolumeHeaders.
1521 * Found volumes are also taken offline if they are in the specific volume
1522 * group we are looking for.
1524 * @param[in] dp the disk partition
1525 * @param[in] name full path to the .vol header
1526 * @param[in] hdr the header data
1527 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1528 * @param[in] rock actually a struct SalvageScanParams*, containing the
1529 * information needed to record the volume summary data
1531 * @return operation status
1533 * @retval -1 volume locking raced with fileserver restart; checking out
1534 * and locking volumes needs to be retried
1535 * @retval 1 volume header is mis-named and should be deleted
1538 RecordHeader(struct DiskPartition64 *dp, const char *name,
1539 struct VolumeDiskHeader *hdr, int last, void *rock)
1541 char nameShouldBe[64];
1542 struct SalvageScanParams *params;
1543 struct VolumeSummary summary;
1544 VolumeId singleVolumeNumber;
1545 struct SalvInfo *salvinfo;
1547 params = (struct SalvageScanParams *)rock;
1549 singleVolumeNumber = params->singleVolumeNumber;
1550 salvinfo = params->salvinfo;
1552 DiskToVolumeHeader(&summary.header, hdr);
1554 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1555 && summary.header.parent != singleVolumeNumber) {
1557 if (programType == salvageServer) {
1558 #ifdef SALVSYNC_BUILD_CLIENT
1559 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1560 summary.header.id, summary.header.parent);
1561 if (SALVSYNC_LinkVolume(summary.header.parent,
1565 Log("schedule request failed\n");
1568 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1571 Log("%u is a read-only volume; not salvaged\n",
1572 singleVolumeNumber);
1577 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1578 || summary.header.parent == singleVolumeNumber) {
1580 /* check if the header file is incorrectly named */
1582 const char *base = strrchr(name, OS_DIRSEPC);
1589 snprintf(nameShouldBe, sizeof nameShouldBe,
1590 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1593 if (strcmp(nameShouldBe, base)) {
1594 /* .vol file has wrong name; retry/delete */
1598 if (!badname || last) {
1599 /* only offline the volume if the header is good, or if this is
1600 * the last try looking at it; avoid AskOffline'ing the same vol
1603 if (singleVolumeNumber
1604 && summary.header.id != singleVolumeNumber) {
1605 /* don't offline singleVolumeNumber; we already did that
1608 AskOffline(salvinfo, summary.header.id);
1610 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1612 /* don't lock the volume if the header is bad, since we're
1613 * about to delete it anyway. */
1614 if (LockVolume(salvinfo, summary.header.id)) {
1619 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1623 if (last && !Showmode) {
1624 Log("Volume header file %s is incorrectly named (should be %s "
1625 "not %s); %sdeleted (it will be recreated later, if "
1626 "necessary)\n", name, nameShouldBe, base,
1627 (Testing ? "it would have been " : ""));
1632 summary.fileName = ToString(base);
1635 if (params->nVolumes > params->totalVolumes) {
1636 /* We found more volumes than we found on the first partition walk;
1637 * apparently something created a volume while we were
1638 * partition-salvaging, or we found more than 20 vols when salvaging a
1639 * particular volume. Abort if we detect this, since other programs
1640 * supposed to not touch the partition while it is partition-salvaging,
1641 * and we shouldn't find more than 20 vols in a VG.
1643 Abort("Found %ld vol headers, but should have found at most %ld! "
1644 "Make sure the volserver/fileserver are not running at the "
1645 "same time as a partition salvage\n",
1646 afs_printable_int32_ld(params->nVolumes),
1647 afs_printable_int32_ld(params->totalVolumes));
1650 memcpy(params->vsp, &summary, sizeof(summary));
1658 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1660 * If the header could not be read in at all, the header is always unlinked.
1661 * If instead RecordHeader said the header was bad (that is, the header file
1662 * is mis-named), we only unlink if we are doing a partition salvage, as
1663 * opposed to salvaging a specific volume group.
1665 * @param[in] dp the disk partition
1666 * @param[in] name full path to the .vol header
1667 * @param[in] hdr header data, or NULL if the header could not be read
1668 * @param[in] rock actually a struct SalvageScanParams*, with some information
1672 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1673 struct VolumeDiskHeader *hdr, void *rock)
1675 struct SalvageScanParams *params;
1678 params = (struct SalvageScanParams *)rock;
1681 /* no header; header is too bogus to read in at all */
1683 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1689 } else if (!params->singleVolumeNumber) {
1690 /* We were able to read in a header, but RecordHeader said something
1691 * was wrong with it. We only unlink those if we are doing a partition
1698 if (dounlink && unlink(name)) {
1699 Log("Error %d while trying to unlink %s\n", errno, name);
1704 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1705 * the fileserver for VG information, or by scanning the /vicepX partition.
1707 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1708 * are salvaging, or 0 if this is a partition
1711 * @return operation status
1713 * @retval -1 we raced with a fileserver restart; checking out and locking
1714 * volumes must be retried
1717 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1719 afs_int32 nvols = 0;
1720 struct SalvageScanParams params;
1723 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1725 /* we successfully got the vol information from the fileserver; no
1726 * need to scan the partition */
1730 /* we need to retry volume checkout */
1734 if (!singleVolumeNumber) {
1735 /* Count how many volumes we have in /vicepX */
1736 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1739 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1744 nvols = VOL_VG_MAX_VOLS;
1747 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1748 osi_Assert(salvinfo->volumeSummaryp != NULL);
1750 params.singleVolumeNumber = singleVolumeNumber;
1751 params.vsp = salvinfo->volumeSummaryp;
1752 params.nVolumes = 0;
1753 params.totalVolumes = nvols;
1755 params.salvinfo = salvinfo;
1757 /* walk the partition directory of volume headers and record the info
1758 * about them; unlinking invalid headers */
1759 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1760 UnlinkHeader, ¶ms);
1762 /* we apparently need to retry checking-out/locking volumes */
1766 Abort("Failed to get volume header summary\n");
1768 salvinfo->nVolumes = params.nVolumes;
1770 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1776 /* Find the link table. This should be associated with the RW volume or, if
1777 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1780 FindLinkHandle(struct InodeSummary *isp, int nVols,
1781 struct ViceInodeInfo *allInodes)
1784 struct ViceInodeInfo *ip;
1786 for (i = 0; i < nVols; i++) {
1787 ip = allInodes + isp[i].index;
1788 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1789 if (ip[j].u.special.type == VI_LINKTABLE)
1790 return ip[j].inodeNumber;
1797 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1799 struct versionStamp version;
1802 if (!VALID_INO(ino))
1804 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1805 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1806 if (!VALID_INO(ino))
1808 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1809 isp->RWvolumeId, errno);
1810 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1811 fdP = IH_OPEN(salvinfo->VGLinkH);
1813 Abort("Can't open link table for volume %u (error = %d)\n",
1814 isp->RWvolumeId, errno);
1816 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1817 Abort("Can't truncate link table for volume %u (error = %d)\n",
1818 isp->RWvolumeId, errno);
1820 version.magic = LINKTABLEMAGIC;
1821 version.version = LINKTABLEVERSION;
1823 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1825 Abort("Can't truncate link table for volume %u (error = %d)\n",
1826 isp->RWvolumeId, errno);
1828 FDH_REALLYCLOSE(fdP);
1830 /* If the volume summary exits (i.e., the V*.vol header file exists),
1831 * then set this inode there as well.
1833 if (isp->volSummary)
1834 isp->volSummary->header.linkTable = ino;
1843 SVGParms_t *parms = (SVGParms_t *) arg;
1844 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1849 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1852 pthread_attr_t tattr;
1856 /* Initialize per volume global variables, even if later code does so */
1857 salvinfo->VolumeChanged = 0;
1858 salvinfo->VGLinkH = NULL;
1859 salvinfo->VGLinkH_cnt = 0;
1860 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1862 parms.svgp_inodeSummaryp = isp;
1863 parms.svgp_count = nVols;
1864 parms.svgp_salvinfo = salvinfo;
1865 code = pthread_attr_init(&tattr);
1867 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1871 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1873 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1876 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1878 Log("Failed to create thread to salvage volume group %u\n",
1882 (void)pthread_join(tid, NULL);
1884 #endif /* AFS_NT40_ENV */
1887 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1889 struct ViceInodeInfo *inodes, *allInodes, *ip;
1890 int i, totalInodes, size, salvageTo;
1894 int dec_VGLinkH = 0;
1896 FdHandle_t *fdP = NULL;
1898 salvinfo->VGLinkH_cnt = 0;
1899 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1900 && isp->nSpecialInodes > 0);
1901 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1902 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1905 if (ShowMounts && !haveRWvolume)
1907 if (canfork && !debug && Fork() != 0) {
1908 (void)Wait("Salvage volume group");
1911 for (i = 0, totalInodes = 0; i < nVols; i++)
1912 totalInodes += isp[i].nInodes;
1913 size = totalInodes * sizeof(struct ViceInodeInfo);
1914 inodes = (struct ViceInodeInfo *)malloc(size);
1915 allInodes = inodes - isp->index; /* this would the base of all the inodes
1916 * for the partition, if all the inodes
1917 * had been read into memory */
1919 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1921 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1923 /* Don't try to salvage a read write volume if there isn't one on this
1925 salvageTo = haveRWvolume ? 0 : 1;
1927 #ifdef AFS_NAMEI_ENV
1928 ino = FindLinkHandle(isp, nVols, allInodes);
1929 if (VALID_INO(ino)) {
1930 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1931 fdP = IH_OPEN(salvinfo->VGLinkH);
1933 if (!VALID_INO(ino) || fdP == NULL) {
1934 Log("%s link table for volume %u.\n",
1935 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1937 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1940 struct ViceInodeInfo *ip;
1941 CreateLinkTable(salvinfo, isp, ino);
1942 fdP = IH_OPEN(salvinfo->VGLinkH);
1943 /* Sync fake 1 link counts to the link table, now that it exists */
1945 for (i = 0; i < nVols; i++) {
1946 ip = allInodes + isp[i].index;
1947 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1948 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1955 FDH_REALLYCLOSE(fdP);
1957 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1960 /* Salvage in reverse order--read/write volume last; this way any
1961 * Inodes not referenced by the time we salvage the read/write volume
1962 * can be picked up by the read/write volume */
1963 /* ACTUALLY, that's not done right now--the inodes just vanish */
1964 for (i = nVols - 1; i >= salvageTo; i--) {
1966 struct InodeSummary *lisp = &isp[i];
1967 #ifdef AFS_NAMEI_ENV
1968 /* If only the RO is present on this partition, the link table
1969 * shows up as a RW volume special file. Need to make sure the
1970 * salvager doesn't try to salvage the non-existent RW.
1972 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1973 /* If this only special inode is the link table, continue */
1974 if (inodes->u.special.type == VI_LINKTABLE) {
1981 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1982 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1983 /* Check inodes twice. The second time do things seriously. This
1984 * way the whole RO volume can be deleted, below, if anything goes wrong */
1985 for (check = 1; check >= 0; check--) {
1987 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1989 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1990 if (rw && deleteMe) {
1991 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1992 * volume won't be called */
1998 if (rw && check == 1)
2000 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2001 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2007 /* Fix actual inode counts */
2010 Log("totalInodes %d\n",totalInodes);
2011 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2012 static int TraceBadLinkCounts = 0;
2013 #ifdef AFS_NAMEI_ENV
2014 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2015 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2016 VGLinkH_p1 = ip->u.param[0];
2017 continue; /* Deal with this last. */
2020 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2021 TraceBadLinkCounts--; /* Limit reports, per volume */
2022 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2024 while (ip->linkCount > 0) {
2025 /* below used to assert, not break */
2027 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2028 Log("idec failed. inode %s errno %d\n",
2029 PrintInode(stmp, ip->inodeNumber), errno);
2035 while (ip->linkCount < 0) {
2036 /* these used to be asserts */
2038 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2039 Log("iinc failed. inode %s errno %d\n",
2040 PrintInode(stmp, ip->inodeNumber), errno);
2047 #ifdef AFS_NAMEI_ENV
2048 while (dec_VGLinkH > 0) {
2049 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2050 Log("idec failed on link table, errno = %d\n", errno);
2054 while (dec_VGLinkH < 0) {
2055 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2056 Log("iinc failed on link table, errno = %d\n", errno);
2063 /* Directory consistency checks on the rw volume */
2065 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2066 IH_RELEASE(salvinfo->VGLinkH);
2068 if (canfork && !debug) {
2075 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2077 /* Check headers BEFORE forking */
2081 for (i = 0; i < nVols; i++) {
2082 struct VolumeSummary *vs = isp[i].volSummary;
2083 VolumeDiskData volHeader;
2085 /* Don't salvage just because phantom rw volume is there... */
2086 /* (If a read-only volume exists, read/write inodes must also exist) */
2087 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2091 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2092 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2093 == sizeof(volHeader)
2094 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2095 && volHeader.dontSalvage == DONT_SALVAGE
2096 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2097 if (volHeader.inUse != 0) {
2098 volHeader.inUse = 0;
2099 volHeader.inService = 1;
2101 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2102 != sizeof(volHeader)) {
2118 /* SalvageVolumeHeaderFile
2120 * Salvage the top level V*.vol header file. Make sure the special files
2121 * exist and that there are no duplicates.
2123 * Calls SalvageHeader for each possible type of volume special file.
2127 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2128 struct ViceInodeInfo *inodes, int RW,
2129 int check, int *deleteMe)
2132 struct ViceInodeInfo *ip;
2133 int allinodesobsolete = 1;
2134 struct VolumeDiskHeader diskHeader;
2135 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2137 struct VolumeHeader tempHeader;
2138 struct afs_inode_info stuff[MAXINODETYPE];
2140 /* keeps track of special inodes that are probably 'good'; they are
2141 * referenced in the vol header, and are included in the given inodes
2146 } goodspecial[MAXINODETYPE];
2151 memset(goodspecial, 0, sizeof(goodspecial));
2153 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2155 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2157 Log("cannot allocate memory for inode skip array when salvaging "
2158 "volume %lu; not performing duplicate special inode recovery\n",
2159 afs_printable_uint32_lu(isp->volumeId));
2160 /* still try to perform the salvage; the skip array only does anything
2161 * if we detect duplicate special inodes */
2164 init_inode_info(&tempHeader, stuff);
2167 * First, look at the special inodes and see if any are referenced by
2168 * the existing volume header. If we find duplicate special inodes, we
2169 * can use this information to use the referenced inode (it's more
2170 * likely to be the 'good' one), and throw away the duplicates.
2172 if (isp->volSummary && skip) {
2173 /* use tempHeader, so we can use the stuff[] array to easily index
2174 * into the isp->volSummary special inodes */
2175 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2177 for (i = 0; i < isp->nSpecialInodes; i++) {
2178 ip = &inodes[isp->index + i];
2179 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2180 /* will get taken care of in a later loop */
2183 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2184 goodspecial[ip->u.special.type-1].valid = 1;
2185 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2190 memset(&tempHeader, 0, sizeof(tempHeader));
2191 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2192 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2193 tempHeader.id = isp->volumeId;
2194 tempHeader.parent = isp->RWvolumeId;
2196 /* Check for duplicates (inodes are sorted by type field) */
2197 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2198 ip = &inodes[isp->index + i];
2199 if (ip->u.special.type == (ip + 1)->u.special.type) {
2200 afs_ino_str_t stmp1, stmp2;
2202 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2203 /* Will be caught in the loop below */
2207 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2208 ip->u.special.type, isp->volumeId,
2209 PrintInode(stmp1, ip->inodeNumber),
2210 PrintInode(stmp2, (ip+1)->inodeNumber));
2212 if (skip && goodspecial[ip->u.special.type-1].valid) {
2213 Inode gi = goodspecial[ip->u.special.type-1].inode;
2216 Log("using special inode referenced by vol header (%s)\n",
2217 PrintInode(stmp1, gi));
2220 /* the volume header references some special inode of
2221 * this type in the inodes array; are we it? */
2222 if (ip->inodeNumber != gi) {
2224 } else if ((ip+1)->inodeNumber != gi) {
2225 /* in case this is the last iteration; we need to
2226 * make sure we check ip+1, too */
2231 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2239 for (i = 0; i < isp->nSpecialInodes; i++) {
2241 ip = &inodes[isp->index + i];
2242 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2244 Log("Rubbish header inode %s of type %d\n",
2245 PrintInode(stmp, ip->inodeNumber),
2246 ip->u.special.type);
2252 Log("Rubbish header inode %s of type %d; deleted\n",
2253 PrintInode(stmp, ip->inodeNumber),
2254 ip->u.special.type);
2255 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2256 if (skip && skip[i]) {
2257 if (orphans == ORPH_REMOVE) {
2258 Log("Removing orphan special inode %s of type %d\n",
2259 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2262 Log("Ignoring orphan special inode %s of type %d\n",
2263 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2264 /* fall through to the ip->linkCount--; line below */
2267 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2268 allinodesobsolete = 0;
2270 if (!check && ip->u.special.type != VI_LINKTABLE)
2271 ip->linkCount--; /* Keep the inode around */
2279 if (allinodesobsolete) {
2286 salvinfo->VGLinkH_cnt++; /* one for every header. */
2288 if (!RW && !check && isp->volSummary) {
2289 ClearROInUseBit(isp->volSummary);
2293 for (i = 0; i < MAXINODETYPE; i++) {
2294 if (stuff[i].inodeType == VI_LINKTABLE) {
2295 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2296 * And we may have recreated the link table earlier, so set the
2297 * RW header as well.
2299 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2300 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2304 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2308 if (isp->volSummary == NULL) {
2310 char headerName[64];
2311 snprintf(headerName, sizeof headerName, VFORMAT,
2312 afs_printable_uint32_lu(isp->volumeId));
2313 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2314 salvinfo->fileSysPath, headerName);
2316 Log("No header file for volume %u\n", isp->volumeId);
2320 Log("No header file for volume %u; %screating %s\n",
2321 isp->volumeId, (Testing ? "it would have been " : ""),
2323 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2324 isp->volSummary->fileName = ToString(headerName);
2326 writefunc = VCreateVolumeDiskHeader;
2329 char headerName[64];
2330 /* hack: these two fields are obsolete... */
2331 isp->volSummary->header.volumeAcl = 0;
2332 isp->volSummary->header.volumeMountTable = 0;
2335 (&isp->volSummary->header, &tempHeader,
2336 sizeof(struct VolumeHeader))) {
2337 /* We often remove the name before calling us, so we make a fake one up */
2338 if (isp->volSummary->fileName) {
2339 strcpy(headerName, isp->volSummary->fileName);
2341 snprintf(headerName, sizeof headerName, VFORMAT,
2342 afs_printable_uint32_lu(isp->volumeId));
2343 isp->volSummary->fileName = ToString(headerName);
2345 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2346 salvinfo->fileSysPath, headerName);
2348 Log("Header file %s is damaged or no longer valid%s\n", path,
2349 (check ? "" : "; repairing"));
2353 writefunc = VWriteVolumeDiskHeader;
2357 memcpy(&isp->volSummary->header, &tempHeader,
2358 sizeof(struct VolumeHeader));
2361 Log("It would have written a new header file for volume %u\n",
2365 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2366 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2368 Log("Error %ld writing volume header file for volume %lu\n",
2369 afs_printable_int32_ld(code),
2370 afs_printable_uint32_lu(diskHeader.id));
2375 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2376 isp->volSummary->header.volumeInfo);
2381 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2382 struct InodeSummary *isp, int check, int *deleteMe)
2385 VolumeDiskData volumeInfo;
2386 struct versionStamp fileHeader;
2395 #ifndef AFS_NAMEI_ENV
2396 if (sp->inodeType == VI_LINKTABLE)
2399 if (*(sp->inode) == 0) {
2401 Log("Missing inode in volume header (%s)\n", sp->description);
2405 Log("Missing inode in volume header (%s); %s\n", sp->description,
2406 (Testing ? "it would have recreated it" : "recreating"));
2409 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2410 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2411 if (!VALID_INO(*(sp->inode)))
2413 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2414 sp->description, errno);
2419 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2420 fdP = IH_OPEN(specH);
2421 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2422 /* bail out early and destroy the volume */
2424 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2431 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2432 sp->description, errno);
2435 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2436 || header.fileHeader.magic != sp->stamp.magic)) {
2438 Log("Part of the header (%s) is corrupted\n", sp->description);
2439 FDH_REALLYCLOSE(fdP);
2443 Log("Part of the header (%s) is corrupted; recreating\n",
2446 /* header can be garbage; make sure we don't read garbage data from
2448 memset(&header, 0, sizeof(header));
2450 if (sp->inodeType == VI_VOLINFO
2451 && header.volumeInfo.destroyMe == DESTROY_ME) {
2454 FDH_REALLYCLOSE(fdP);
2458 if (recreate && !Testing) {
2461 ("Internal error: recreating volume header (%s) in check mode\n",
2463 nBytes = FDH_TRUNC(fdP, 0);
2465 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2466 sp->description, errno);
2468 /* The following code should be moved into vutil.c */
2469 if (sp->inodeType == VI_VOLINFO) {
2471 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2472 header.volumeInfo.stamp = sp->stamp;
2473 header.volumeInfo.id = isp->volumeId;
2474 header.volumeInfo.parentId = isp->RWvolumeId;
2475 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2476 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2477 isp->volumeId, isp->volumeId);
2478 header.volumeInfo.inService = 0;
2479 header.volumeInfo.blessed = 0;
2480 /* The + 1000 is a hack in case there are any files out in venus caches */
2481 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2482 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2483 header.volumeInfo.needsCallback = 0;
2484 gettimeofday(&tp, NULL);
2485 header.volumeInfo.creationDate = tp.tv_sec;
2487 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2488 sizeof(header.volumeInfo), 0);
2489 if (nBytes != sizeof(header.volumeInfo)) {
2492 ("Unable to write volume header file (%s) (errno = %d)\n",
2493 sp->description, errno);
2494 Abort("Unable to write entire volume header file (%s)\n",
2498 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2499 if (nBytes != sizeof(sp->stamp)) {
2502 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2503 sp->description, errno);
2505 ("Unable to write entire version stamp in volume header file (%s)\n",
2510 FDH_REALLYCLOSE(fdP);
2512 if (sp->inodeType == VI_VOLINFO) {
2513 salvinfo->VolInfo = header.volumeInfo;
2517 if (salvinfo->VolInfo.updateDate) {
2518 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2520 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2521 salvinfo->VolInfo.id,
2522 (Testing ? "it would have been " : ""), update);
2524 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2526 Log("%s (%u) not updated (created %s)\n",
2527 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2537 SalvageVnodes(struct SalvInfo *salvinfo,
2538 struct InodeSummary *rwIsp,
2539 struct InodeSummary *thisIsp,
2540 struct ViceInodeInfo *inodes, int check)
2542 int ilarge, ismall, ioffset, RW, nInodes;
2543 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2546 RW = (rwIsp == thisIsp);
2547 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2549 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2550 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2551 if (check && ismall == -1)
2554 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2555 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2556 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2560 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2561 struct ViceInodeInfo *ip, int nInodes,
2562 struct VolumeSummary *volSummary, int check)
2564 char buf[SIZEOF_LARGEDISKVNODE];
2565 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2567 StreamHandle_t *file;
2568 struct VnodeClassInfo *vcp;
2570 afs_sfsize_t nVnodes;
2571 afs_fsize_t vnodeLength;
2573 afs_ino_str_t stmp1, stmp2;
2577 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2578 fdP = IH_OPEN(handle);
2579 osi_Assert(fdP != NULL);
2580 file = FDH_FDOPEN(fdP, "r+");
2581 osi_Assert(file != NULL);
2582 vcp = &VnodeClassInfo[class];
2583 size = OS_SIZE(fdP->fd_fd);
2584 osi_Assert(size != -1);
2585 nVnodes = (size / vcp->diskSize) - 1;
2587 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2588 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2592 for (vnodeIndex = 0;
2593 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2594 nVnodes--, vnodeIndex++) {
2595 if (vnode->type != vNull) {
2596 int vnodeChanged = 0;
2597 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2598 if (VNDISK_GET_INO(vnode) == 0) {
2600 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2601 memset(vnode, 0, vcp->diskSize);
2605 if (vcp->magic != vnode->vnodeMagic) {
2606 /* bad magic #, probably partially created vnode */
2608 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2609 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2610 afs_printable_uint32_lu(vcp->magic));
2611 memset(vnode, 0, vcp->diskSize);
2615 Log("Partially allocated vnode %d deleted.\n",
2617 memset(vnode, 0, vcp->diskSize);
2621 /* ****** Should do a bit more salvage here: e.g. make sure
2622 * vnode type matches what it should be given the index */
2623 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2624 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2625 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2626 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2633 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2634 /* The following doesn't work, because the version number
2635 * is not maintained correctly by the file server */
2636 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2637 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2639 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2645 /* For RW volume, look for vnode with matching inode number;
2646 * if no such match, take the first determined by our sort
2648 struct ViceInodeInfo *lip = ip;
2649 int lnInodes = nInodes;
2651 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2652 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2661 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2662 /* "Matching" inode */
2666 vu = vnode->uniquifier;
2667 iu = ip->u.vnode.vnodeUniquifier;
2668 vd = vnode->dataVersion;
2669 id = ip->u.vnode.inodeDataVersion;
2671 * Because of the possibility of the uniquifier overflows (> 4M)
2672 * we compare them modulo the low 22-bits; we shouldn't worry
2673 * about mismatching since they shouldn't to many old
2674 * uniquifiers of the same vnode...
2676 if (IUnique(vu) != IUnique(iu)) {
2678 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2681 vnode->uniquifier = iu;
2682 #ifdef AFS_3DISPARES
2683 vnode->dataVersion = (id >= vd ?
2686 1887437 ? vd : id) :
2689 1887437 ? id : vd));
2691 #if defined(AFS_SGI_EXMAG)
2692 vnode->dataVersion = (id >= vd ?
2695 15099494 ? vd : id) :
2698 15099494 ? id : vd));
2700 vnode->dataVersion = (id > vd ? id : vd);
2701 #endif /* AFS_SGI_EXMAG */
2702 #endif /* AFS_3DISPARES */
2705 /* don't bother checking for vd > id any more, since
2706 * partial file transfers always result in this state,
2707 * and you can't do much else anyway (you've already
2708 * found the best data you can) */
2709 #ifdef AFS_3DISPARES
2710 if (!vnodeIsDirectory(vnodeNumber)
2711 && ((vd < id && (id - vd) < 1887437)
2712 || ((vd > id && (vd - id) > 1887437)))) {
2714 #if defined(AFS_SGI_EXMAG)
2715 if (!vnodeIsDirectory(vnodeNumber)
2716 && ((vd < id && (id - vd) < 15099494)
2717 || ((vd > id && (vd - id) > 15099494)))) {
2719 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2720 #endif /* AFS_SGI_EXMAG */
2723 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2724 vnode->dataVersion = id;
2729 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2732 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2734 VNDISK_SET_INO(vnode, ip->inodeNumber);
2739 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2741 VNDISK_SET_INO(vnode, ip->inodeNumber);
2744 VNDISK_GET_LEN(vnodeLength, vnode);
2745 if (ip->byteCount != vnodeLength) {
2748 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2753 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2754 VNDISK_SET_LEN(vnode, ip->byteCount);
2758 ip->linkCount--; /* Keep the inode around */
2761 } else { /* no matching inode */
2763 if (VNDISK_GET_INO(vnode) != 0
2764 || vnode->type == vDirectory) {
2765 /* No matching inode--get rid of the vnode */
2767 if (VNDISK_GET_INO(vnode)) {
2769 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2773 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2778 if (VNDISK_GET_INO(vnode)) {
2780 time_t serverModifyTime = vnode->serverModifyTime;
2781 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2785 time_t serverModifyTime = vnode->serverModifyTime;
2786 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2789 memset(vnode, 0, vcp->diskSize);
2792 /* Should not reach here becuase we checked for
2793 * (inodeNumber == 0) above. And where we zero the vnode,
2794 * we also goto vnodeDone.
2798 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2802 } /* VNDISK_GET_INO(vnode) != 0 */
2804 osi_Assert(!(vnodeChanged && check));
2805 if (vnodeChanged && !Testing) {
2806 osi_Assert(IH_IWRITE
2807 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2808 (char *)vnode, vcp->diskSize)
2810 salvinfo->VolumeChanged = 1; /* For break call back */
2821 struct VnodeEssence *
2822 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2825 struct VnodeInfo *vip;
2828 class = vnodeIdToClass(vnodeNumber);
2829 vip = &salvinfo->vnodeInfo[class];
2830 offset = vnodeIdToBitNumber(vnodeNumber);
2831 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2835 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2837 /* Copy the directory unconditionally if we are going to change it:
2838 * not just if was cloned.
2840 struct VnodeDiskObject vnode;
2841 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2842 Inode oldinode, newinode;
2845 if (dir->copied || Testing)
2847 DFlush(); /* Well justified paranoia... */
2850 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2851 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2853 osi_Assert(code == sizeof(vnode));
2854 oldinode = VNDISK_GET_INO(&vnode);
2855 /* Increment the version number by a whole lot to avoid problems with
2856 * clients that were promised new version numbers--but the file server
2857 * crashed before the versions were written to disk.
2860 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2861 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2863 osi_Assert(VALID_INO(newinode));
2864 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2866 VNDISK_SET_INO(&vnode, newinode);
2868 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2869 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2871 osi_Assert(code == sizeof(vnode));
2873 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2874 salvinfo->fileSysDevice, newinode,
2875 &salvinfo->VolumeChanged);
2876 /* Don't delete the original inode right away, because the directory is
2877 * still being scanned.
2883 * This function should either successfully create a new dir, or give up
2884 * and leave things the way they were. In particular, if it fails to write
2885 * the new dir properly, it should return w/o changing the reference to the
2889 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2891 struct VnodeDiskObject vnode;
2892 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2893 Inode oldinode, newinode;
2898 afs_int32 parentUnique = 1;
2899 struct VnodeEssence *vnodeEssence;
2904 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2906 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2907 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2909 osi_Assert(lcode == sizeof(vnode));
2910 oldinode = VNDISK_GET_INO(&vnode);
2911 /* Increment the version number by a whole lot to avoid problems with
2912 * clients that were promised new version numbers--but the file server
2913 * crashed before the versions were written to disk.
2916 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2917 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2919 osi_Assert(VALID_INO(newinode));
2920 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2921 &salvinfo->VolumeChanged);
2923 /* Assign . and .. vnode numbers from dir and vnode.parent.
2924 * The uniquifier for . is in the vnode.
2925 * The uniquifier for .. might be set to a bogus value of 1 and
2926 * the salvager will later clean it up.
2928 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2929 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2932 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2934 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2939 /* didn't really build the new directory properly, let's just give up. */
2940 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2941 Log("Directory salvage returned code %d, continuing.\n", code);
2943 Log("also failed to decrement link count on new inode");
2947 Log("Checking the results of the directory salvage...\n");
2948 if (!DirOK(&newdir)) {
2949 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2950 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2951 osi_Assert(code == 0);
2955 VNDISK_SET_INO(&vnode, newinode);
2956 length = Length(&newdir);
2957 VNDISK_SET_LEN(&vnode, length);
2959 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2960 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2962 osi_Assert(lcode == sizeof(vnode));
2965 nt_sync(salvinfo->fileSysDevice);
2967 sync(); /* this is slow, but hopefully rarely called. We don't have
2968 * an open FD on the file itself to fsync.
2972 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2974 /* make sure old directory file is really closed */
2975 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2976 FDH_REALLYCLOSE(fdP);
2978 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2979 osi_Assert(code == 0);
2980 dir->dirHandle = newdir;
2984 * arguments for JudgeEntry.
2986 struct judgeEntry_params {
2987 struct DirSummary *dir; /**< directory we're examining entries in */
2988 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2992 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2995 struct judgeEntry_params *params = arock;
2996 struct DirSummary *dir = params->dir;
2997 struct SalvInfo *salvinfo = params->salvinfo;
2998 struct VnodeEssence *vnodeEssence;
2999 afs_int32 dirOrphaned, todelete;
3001 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3003 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3004 if (vnodeEssence == NULL) {
3006 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3009 CopyOnWrite(salvinfo, dir);
3010 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3015 #ifndef AFS_NAMEI_ENV
3016 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3017 * mount inode for the partition. If this inode were deleted, it would crash
3020 if (vnodeEssence->InodeNumber == 0) {
3021 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3023 CopyOnWrite(salvinfo, dir);
3024 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3031 if (!(vnodeNumber & 1) && !Showmode
3032 && !(vnodeEssence->count || vnodeEssence->unique
3033 || vnodeEssence->modeBits)) {
3034 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3035 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3036 vnodeNumber, unique,
3037 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3041 CopyOnWrite(salvinfo, dir);
3042 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3048 /* Check if the Uniquifiers match. If not, change the directory entry
3049 * so its unique matches the vnode unique. Delete if the unique is zero
3050 * or if the directory is orphaned.
3052 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3053 if (!vnodeEssence->unique
3054 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3055 /* This is an orphaned directory. Don't delete the . or ..
3056 * entry. Otherwise, it will get created in the next
3057 * salvage and deleted again here. So Just skip it.
3062 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3065 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3069 fid.Vnode = vnodeNumber;
3070 fid.Unique = vnodeEssence->unique;
3071 CopyOnWrite(salvinfo, dir);
3072 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3074 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3077 return 0; /* no need to continue */
3080 if (strcmp(name, ".") == 0) {
3081 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3084 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3086 CopyOnWrite(salvinfo, dir);
3087 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3088 fid.Vnode = dir->vnodeNumber;
3089 fid.Unique = dir->unique;
3090 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3093 vnodeNumber = fid.Vnode; /* Get the new Essence */
3094 unique = fid.Unique;
3095 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3098 } else if (strcmp(name, "..") == 0) {
3101 struct VnodeEssence *dotdot;
3102 pa.Vnode = dir->parent;
3103 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3104 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3105 pa.Unique = dotdot->unique;
3107 pa.Vnode = dir->vnodeNumber;
3108 pa.Unique = dir->unique;
3110 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3112 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3114 CopyOnWrite(salvinfo, dir);
3115 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3116 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3119 vnodeNumber = pa.Vnode; /* Get the new Essence */
3121 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3123 dir->haveDotDot = 1;
3124 } else if (strncmp(name, ".__afs", 6) == 0) {
3126 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3129 CopyOnWrite(salvinfo, dir);
3130 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3132 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3133 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3136 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3137 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3138 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3139 && !(vnodeEssence->modeBits & 0111)) {
3140 afs_sfsize_t nBytes;
3146 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3147 vnodeEssence->InodeNumber);
3150 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3154 size = FDH_SIZE(fdP);
3156 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3157 FDH_REALLYCLOSE(fdP);
3164 nBytes = FDH_PREAD(fdP, buf, size, 0);
3165 if (nBytes == size) {
3167 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3168 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3169 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3170 Testing ? "would convert" : "converted");
3171 vnodeEssence->modeBits |= 0111;
3172 vnodeEssence->changed = 1;
3173 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3174 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3175 dir->name ? dir->name : "??", name, buf);
3177 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3178 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3180 FDH_REALLYCLOSE(fdP);
3183 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3184 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3185 if (vnodeIdToClass(vnodeNumber) == vLarge
3186 && vnodeEssence->name == NULL) {
3188 if ((n = (char *)malloc(strlen(name) + 1)))
3190 vnodeEssence->name = n;
3193 /* The directory entry points to the vnode. Check to see if the
3194 * vnode points back to the directory. If not, then let the
3195 * directory claim it (else it might end up orphaned). Vnodes
3196 * already claimed by another directory are deleted from this
3197 * directory: hardlinks to the same vnode are not allowed
3198 * from different directories.
3200 if (vnodeEssence->parent != dir->vnodeNumber) {
3201 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3202 /* Vnode does not point back to this directory.
3203 * Orphaned dirs cannot claim a file (it may belong to
3204 * another non-orphaned dir).
3207 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3209 vnodeEssence->parent = dir->vnodeNumber;
3210 vnodeEssence->changed = 1;
3212 /* Vnode was claimed by another directory */
3215 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3216 } else if (vnodeNumber == 1) {
3217 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3219 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3223 CopyOnWrite(salvinfo, dir);
3224 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3229 /* This directory claims the vnode */
3230 vnodeEssence->claimed = 1;
3232 vnodeEssence->count--;
3237 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3238 VnodeClass class, Inode ino, Unique * maxu)
3240 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3241 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3242 char buf[SIZEOF_LARGEDISKVNODE];
3243 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3245 StreamHandle_t *file;
3250 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3251 fdP = IH_OPEN(vip->handle);
3252 osi_Assert(fdP != NULL);
3253 file = FDH_FDOPEN(fdP, "r+");
3254 osi_Assert(file != NULL);
3255 size = OS_SIZE(fdP->fd_fd);
3256 osi_Assert(size != -1);
3257 vip->nVnodes = (size / vcp->diskSize) - 1;
3258 if (vip->nVnodes > 0) {
3259 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3260 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3261 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3262 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3263 if (class == vLarge) {
3264 osi_Assert((vip->inodes = (Inode *)
3265 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3274 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3275 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3276 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3277 nVnodes--, vnodeIndex++) {
3278 if (vnode->type != vNull) {
3279 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3280 afs_fsize_t vnodeLength;
3281 vip->nAllocatedVnodes++;
3282 vep->count = vnode->linkCount;
3283 VNDISK_GET_LEN(vnodeLength, vnode);
3284 vep->blockCount = nBlocks(vnodeLength);
3285 vip->volumeBlockCount += vep->blockCount;
3286 vep->parent = vnode->parent;
3287 vep->unique = vnode->uniquifier;
3288 if (*maxu < vnode->uniquifier)
3289 *maxu = vnode->uniquifier;
3290 vep->modeBits = vnode->modeBits;
3291 vep->InodeNumber = VNDISK_GET_INO(vnode);
3292 vep->type = vnode->type;
3293 vep->author = vnode->author;
3294 vep->owner = vnode->owner;
3295 vep->group = vnode->group;
3296 if (vnode->type == vDirectory) {
3297 if (class != vLarge) {
3298 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3299 vip->nAllocatedVnodes--;
3300 memset(vnode, 0, sizeof(vnode));
3301 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3302 vnodeIndexOffset(vcp, vnodeNumber),
3303 (char *)&vnode, sizeof(vnode));
3304 salvinfo->VolumeChanged = 1;
3306 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3315 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3318 struct VnodeEssence *parentvp;
3324 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3325 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3326 strcat(path, OS_DIRSEP);
3327 strcat(path, vp->name);
3333 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3334 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3337 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3339 struct VnodeEssence *vep;
3342 return (1); /* Vnode zero does not exist */
3344 return (0); /* The root dir vnode is always claimed */
3345 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3346 if (!vep || !vep->claimed)
3347 return (1); /* Vnode is not claimed - it is orphaned */
3349 return (IsVnodeOrphaned(salvinfo, vep->parent));
3353 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3354 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3355 struct DirSummary *rootdir, int *rootdirfound)
3357 static struct DirSummary dir;
3358 static struct DirHandle dirHandle;
3359 struct VnodeEssence *parent;
3360 static char path[MAXPATHLEN];
3363 if (dirVnodeInfo->vnodes[i].salvaged)
3364 return; /* already salvaged */
3367 dirVnodeInfo->vnodes[i].salvaged = 1;
3369 if (dirVnodeInfo->inodes[i] == 0)
3370 return; /* Not allocated to a directory */
3372 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3373 if (dirVnodeInfo->vnodes[i].parent) {
3374 Log("Bad parent, vnode 1; %s...\n",
3375 (Testing ? "skipping" : "salvaging"));
3376 dirVnodeInfo->vnodes[i].parent = 0;
3377 dirVnodeInfo->vnodes[i].changed = 1;
3380 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3381 if (parent && parent->salvaged == 0)
3382 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3383 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3384 rootdir, rootdirfound);
3387 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3388 dir.unique = dirVnodeInfo->vnodes[i].unique;
3391 dir.parent = dirVnodeInfo->vnodes[i].parent;
3392 dir.haveDot = dir.haveDotDot = 0;
3393 dir.ds_linkH = alinkH;
3394 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3395 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3397 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHa