2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
208 extern void *calloc();
210 static char *TimeStamp(time_t clock, int precision);
213 int debug; /* -d flag */
214 extern int Testing; /* -n flag */
215 int ListInodeOption; /* -i flag */
216 int ShowRootFiles; /* -r flag */
217 int RebuildDirs; /* -sal flag */
218 int Parallel = 4; /* -para X flag */
219 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
220 int forceR = 0; /* -b flag */
221 int ShowLog = 0; /* -showlog flag */
222 int ShowSuid = 0; /* -showsuid flag */
223 int ShowMounts = 0; /* -showmounts flag */
224 int orphans = ORPH_IGNORE; /* -orphans option */
229 int useSyslog = 0; /* -syslog flag */
230 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
239 #define MAXPARALLEL 32
241 int OKToZap; /* -o flag */
242 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
243 * in the volume header */
245 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
247 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
250 * information that is 'global' to a particular salvage job.
253 Device fileSysDevice; /**< The device number of the current partition
255 char fileSysPath[8]; /**< The path of the mounted partition currently
256 * being salvaged, i.e. the directory containing
257 * the volume headers */
258 char *fileSysPathName; /**< NT needs this to make name pretty log. */
259 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
260 int VGLinkH_cnt; /**< # of references to lnk handle. */
261 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
264 char *fileSysDeviceName; /**< The block device where the file system being
265 * salvaged was mounted */
266 char *filesysfulldev;
268 int VolumeChanged; /**< Set by any routine which would change the
269 * volume in a way which would require callbacks
270 * to be broken if the volume was put back on
271 * on line by an active file server */
273 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
274 * header dealt with */
276 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
277 FD_t inodeFd; /**< File descriptor for inode file */
279 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
280 int nVolumes; /**< Number of volumes (read-write and read-only)
281 * in volume summary */
282 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
285 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
286 * vnodes in the volume that
287 * we are currently looking
289 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
290 * to contact the fileserver over FSYNC */
297 /* Forward declarations */
298 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
299 static int AskVolumeSummary(struct SalvInfo *salvinfo,
300 VolumeId singleVolumeNumber);
301 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
303 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
304 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
305 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
307 /* Uniquifier stored in the Inode */
312 return (u & 0x3fffff);
314 #if defined(AFS_SGI_EXMAG)
315 return (u & SGI_UNIQMASK);
318 #endif /* AFS_SGI_EXMAG */
325 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
327 return 0; /* otherwise may be transient, e.g. EMFILE */
332 char *save_args[MAX_ARGS];
334 extern pthread_t main_thread;
335 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
339 * Get the salvage lock if not already held. Hold until process exits.
341 * @param[in] locktype READ_LOCK or WRITE_LOCK
344 _ObtainSalvageLock(int locktype)
346 struct VLockFile salvageLock;
351 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
353 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
356 "salvager: There appears to be another salvager running! "
361 "salvager: Error %d trying to acquire salvage lock! "
367 ObtainSalvageLock(void)
369 _ObtainSalvageLock(WRITE_LOCK);
372 ObtainSharedSalvageLock(void)
374 _ObtainSalvageLock(READ_LOCK);
378 #ifdef AFS_SGI_XFS_IOPS_ENV
379 /* Check if the given partition is mounted. For XFS, the root inode is not a
380 * constant. So we check the hard way.
383 IsPartitionMounted(char *part)
386 struct mntent *mntent;
388 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
389 while (mntent = getmntent(mntfp)) {
390 if (!strcmp(part, mntent->mnt_dir))
395 return mntent ? 1 : 1;
398 /* Check if the given inode is the root of the filesystem. */
399 #ifndef AFS_SGI_XFS_IOPS_ENV
401 IsRootInode(struct afs_stat_st *status)
404 * The root inode is not a fixed value in XFS partitions. So we need to
405 * see if the partition is in the list of mounted partitions. This only
406 * affects the SalvageFileSys path, so we check there.
408 return (status->st_ino == ROOTINODE);
413 #ifndef AFS_NAMEI_ENV
414 /* We don't want to salvage big files filesystems, since we can't put volumes on
418 CheckIfBigFilesFS(char *mountPoint, char *devName)
420 struct superblock fs;
423 if (strncmp(devName, "/dev/", 5)) {
424 (void)sprintf(name, "/dev/%s", devName);
426 (void)strcpy(name, devName);
429 if (ReadSuper(&fs, name) < 0) {
430 Log("Unable to read superblock. Not salvaging partition %s.\n",
434 if (IsBigFilesFileSystem(&fs)) {
435 Log("Partition %s is a big files filesystem, not salvaging.\n",
445 #define HDSTR "\\Device\\Harddisk"
446 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
448 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
454 static int dowarn = 1;
456 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
458 if (strncmp(res1, HDSTR, HDLEN)) {
461 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
462 res1, HDSTR, p1->devName);
465 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
467 if (strncmp(res2, HDSTR, HDLEN)) {
470 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
471 res2, HDSTR, p2->devName);
475 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
478 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
481 /* This assumes that two partitions with the same device number divided by
482 * PartsPerDisk are on the same disk.
485 SalvageFileSysParallel(struct DiskPartition64 *partP)
488 struct DiskPartition64 *partP;
489 int pid; /* Pid for this job */
490 int jobnumb; /* Log file job number */
491 struct job *nextjob; /* Next partition on disk to salvage */
493 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
494 struct job *thisjob = 0;
495 static int numjobs = 0;
496 static int jobcount = 0;
502 char logFileName[256];
506 /* We have a partition to salvage. Copy it into thisjob */
507 thisjob = (struct job *)malloc(sizeof(struct job));
509 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
512 memset(thisjob, 0, sizeof(struct job));
513 thisjob->partP = partP;
514 thisjob->jobnumb = jobcount;
516 } else if (jobcount == 0) {
517 /* We are asking to wait for all jobs (partp == 0), yet we never
520 Log("No file system partitions named %s* found; not salvaged\n",
521 VICE_PARTITION_PREFIX);
525 if (debug || Parallel == 1) {
527 SalvageFileSys(thisjob->partP, 0);
534 /* Check to see if thisjob is for a disk that we are already
535 * salvaging. If it is, link it in as the next job to do. The
536 * jobs array has 1 entry per disk being salvages. numjobs is
537 * the total number of disks currently being salvaged. In
538 * order to keep thejobs array compact, when a disk is
539 * completed, the hightest element in the jobs array is moved
540 * down to now open slot.
542 for (j = 0; j < numjobs; j++) {
543 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
544 /* On same disk, add it to this list and return */
545 thisjob->nextjob = jobs[j]->nextjob;
546 jobs[j]->nextjob = thisjob;
553 /* Loop until we start thisjob or until all existing jobs are finished */
554 while (thisjob || (!partP && (numjobs > 0))) {
555 startjob = -1; /* No new job to start */
557 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
558 /* Either the max jobs are running or we have to wait for all
559 * the jobs to finish. In either case, we wait for at least one
560 * job to finish. When it's done, clean up after it.
562 pid = wait(&wstatus);
563 osi_Assert(pid != -1);
564 for (j = 0; j < numjobs; j++) { /* Find which job it is */
565 if (pid == jobs[j]->pid)
568 osi_Assert(j < numjobs);
569 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
570 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
573 numjobs--; /* job no longer running */
574 oldjob = jobs[j]; /* remember */
575 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
576 free(oldjob); /* free the old job */
578 /* If there is another partition on the disk to salvage, then
579 * say we will start it (startjob). If not, then put thisjob there
580 * and say we will start it.
582 if (jobs[j]) { /* Another partitions to salvage */
583 startjob = j; /* Will start it */
584 } else { /* There is not another partition to salvage */
586 jobs[j] = thisjob; /* Add thisjob */
588 startjob = j; /* Will start it */
590 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
591 startjob = -1; /* Don't start it - already running */
595 /* We don't have to wait for a job to complete */
597 jobs[numjobs] = thisjob; /* Add this job */
599 startjob = numjobs; /* Will start it */
603 /* Start up a new salvage job on a partition in job slot "startjob" */
604 if (startjob != -1) {
606 Log("Starting salvage of file system partition %s\n",
607 jobs[startjob]->partP->name);
609 /* For NT, we not only fork, but re-exec the salvager. Pass in the
610 * commands and pass the child job number via the data path.
613 nt_SalvagePartition(jobs[startjob]->partP->name,
614 jobs[startjob]->jobnumb);
615 jobs[startjob]->pid = pid;
620 jobs[startjob]->pid = pid;
626 for (fd = 0; fd < 16; fd++)
633 openlog("salvager", LOG_PID, useSyslogFacility);
637 snprintf(logFileName, sizeof logFileName, "%s.%d",
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
698 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 FD_t inodeFile = INVALID_FD;
712 static char tmpDevName[100];
713 static char wpath[100];
714 struct VolumeSummary *vsp, *esp;
718 struct SalvInfo l_salvinfo;
719 struct SalvInfo *salvinfo = &l_salvinfo;
722 memset(salvinfo, 0, sizeof(*salvinfo));
725 if (inodeFile != INVALID_FD) {
727 inodeFile = INVALID_FD;
729 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
730 Abort("Raced too many times with fileserver restarts while trying to "
731 "checkout/lock volumes; Aborted\n");
733 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
735 /* unlock all previous volume locks, since we're about to lock them
737 VLockFileReinit(&partP->volLockFile);
739 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
741 salvinfo->fileSysPartition = partP;
742 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
743 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
746 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
747 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
748 name = partP->devName;
750 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
751 strcpy(tmpDevName, partP->devName);
752 name = get_DevName(tmpDevName, wpath);
753 salvinfo->fileSysDeviceName = name;
754 salvinfo->filesysfulldev = wpath;
757 if (singleVolumeNumber) {
758 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
759 /* only non-DAFS locks the partition when salvaging a single volume;
760 * DAFS will lock the individual volumes in the VG */
761 VLockPartition(partP->name);
762 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
766 /* salvageserver already setup fssync conn for us */
767 if ((programType != salvageServer) && !VConnectFS()) {
768 Abort("Couldn't connect to file server\n");
771 salvinfo->useFSYNC = 1;
772 AskOffline(salvinfo, singleVolumeNumber);
773 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
774 if (LockVolume(salvinfo, singleVolumeNumber)) {
777 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
780 salvinfo->useFSYNC = 0;
781 VLockPartition(partP->name);
785 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
788 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
789 partP->name, name, (Testing ? "(READONLY mode)" : ""));
791 Log("***Forced salvage of all volumes on this partition***\n");
796 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
803 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
804 while ((dp = readdir(dirp))) {
805 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
806 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
808 Log("Removing old salvager temp files %s\n", dp->d_name);
809 strcpy(npath, salvinfo->fileSysPath);
810 strcat(npath, OS_DIRSEP);
811 strcat(npath, dp->d_name);
817 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
819 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
820 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
822 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
826 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
827 if (inodeFile == INVALID_FD) {
828 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
831 /* Using nt_unlink here since we're really using the delete on close
832 * semantics of unlink. In most places in the salvager, we really do
833 * mean to unlink the file at that point. Those places have been
834 * modified to actually do that so that the NT crt can be used there.
836 * jaltman - On NT delete on close cannot be applied to a file while the
837 * process has an open file handle that does not have DELETE file
838 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
839 * delete privileges. As a result the nt_unlink() call will always
842 code = nt_unlink(inodeListPath);
844 code = unlink(inodeListPath);
847 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
850 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
852 if (singleVolumeNumber) {
853 /* the volume group -- let alone the volume -- does not exist,
854 * but we checked it out, so give it back to the fileserver */
855 AskDelete(salvinfo, singleVolumeNumber);
859 salvinfo->inodeFd = inodeFile;
860 if (salvinfo->inodeFd == INVALID_FD)
861 Abort("Temporary file %s is missing...\n", inodeListPath);
862 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
863 if (ListInodeOption) {
864 PrintInodeList(salvinfo);
865 if (singleVolumeNumber) {
866 /* We've checked out the volume from the fileserver, and we need
867 * to give it back. We don't know if the volume exists or not,
868 * so we don't know whether to AskOnline or not. Try to determine
869 * if the volume exists by trying to read the volume header, and
870 * AskOnline if it is readable. */
871 MaybeAskOnline(salvinfo, singleVolumeNumber);
875 /* enumerate volumes in the partition.
876 * figure out sets of read-only + rw volumes.
877 * salvage each set, read-only volumes first, then read-write.
878 * Fix up inodes on last volume in set (whether it is read-write
881 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
885 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
886 i < salvinfo->nVolumesInInodeFile; i = j) {
887 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
889 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
891 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
892 struct VolumeSummary *tsp;
893 /* Scan volume list (from partition root directory) looking for the
894 * current rw volume number in the volume list from the inode scan.
895 * If there is one here that is not in the inode volume list,
897 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
899 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
901 /* Now match up the volume summary info from the root directory with the
902 * entry in the volume list obtained from scanning inodes */
903 salvinfo->inodeSummary[j].volSummary = NULL;
904 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
905 if (tsp->header.id == vid) {
906 salvinfo->inodeSummary[j].volSummary = tsp;
912 /* Salvage the group of volumes (several read-only + 1 read/write)
913 * starting with the current read-only volume we're looking at.
915 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
918 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
919 for (; vsp < esp; vsp++) {
921 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
924 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
925 RemoveTheForce(salvinfo->fileSysPath);
927 if (!Testing && singleVolumeNumber) {
929 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
934 /* Step through the volumeSummary list and set all volumes on-line.
935 * Most volumes were taken off-line in GetVolumeSummary.
936 * If a volume was deleted, don't tell the fileserver anything, since
937 * we already told the fileserver the volume was deleted back when we
938 * we destroyed the volume header.
939 * Also, make sure we bring the singleVolumeNumber back online first.
942 for (j = 0; j < salvinfo->nVolumes; j++) {
943 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
945 if (!salvinfo->volumeSummaryp[j].deleted) {
946 AskOnline(salvinfo, singleVolumeNumber);
952 /* If singleVolumeNumber is not in our volumeSummary, it means that
953 * at least one other volume in the VG is on the partition, but the
954 * RW volume is not. We've already AskOffline'd it by now, though,
955 * so make sure we don't still have the volume checked out. */
956 AskDelete(salvinfo, singleVolumeNumber);
959 for (j = 0; j < salvinfo->nVolumes; j++) {
960 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
961 if (!salvinfo->volumeSummaryp[j].deleted) {
962 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
968 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
969 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
972 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
976 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
979 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
982 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
985 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
987 Log("Error %ld destroying volume disk header for volume %lu\n",
988 afs_printable_int32_ld(code),
989 afs_printable_uint32_lu(vsp->header.id));
992 /* make sure we actually delete the fileName file; ENOENT
993 * is fine, since VDestroyVolumeDiskHeader probably already
995 if (unlink(path) && errno != ENOENT) {
996 Log("Unable to unlink %s (errno = %d)\n", path, errno);
998 if (salvinfo->useFSYNC) {
999 AskDelete(salvinfo, vsp->header.id);
1007 CompareInodes(const void *_p1, const void *_p2)
1009 const struct ViceInodeInfo *p1 = _p1;
1010 const struct ViceInodeInfo *p2 = _p2;
1011 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1012 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1013 VolumeId p1rwid, p2rwid;
1015 (p1->u.vnode.vnodeNumber ==
1016 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1018 (p2->u.vnode.vnodeNumber ==
1019 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1020 if (p1rwid < p2rwid)
1022 if (p1rwid > p2rwid)
1024 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1025 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1026 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1027 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1028 if (p1->u.vnode.volumeId == p1rwid)
1030 if (p2->u.vnode.volumeId == p2rwid)
1032 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1034 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1035 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1036 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1038 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1040 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1042 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1044 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1046 /* The following tests are reversed, so that the most desirable
1047 * of several similar inodes comes first */
1048 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1049 #ifdef AFS_3DISPARES
1050 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1051 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1054 #ifdef AFS_SGI_EXMAG
1055 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1056 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1061 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1062 #ifdef AFS_3DISPARES
1063 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1064 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1067 #ifdef AFS_SGI_EXMAG
1068 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1069 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1074 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1075 #ifdef AFS_3DISPARES
1076 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1077 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1080 #ifdef AFS_SGI_EXMAG
1081 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1082 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1087 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1088 #ifdef AFS_3DISPARES
1089 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1090 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1093 #ifdef AFS_SGI_EXMAG
1094 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1095 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1104 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1105 struct InodeSummary *summary)
1107 VolumeId volume = ip->u.vnode.volumeId;
1108 VolumeId rwvolume = volume;
1113 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1115 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1117 rwvolume = ip->u.special.parentId;
1118 /* This isn't quite right, as there could (in error) be different
1119 * parent inodes in different special vnodes */
1121 if (maxunique < ip->u.vnode.vnodeUniquifier)
1122 maxunique = ip->u.vnode.vnodeUniquifier;
1126 summary->volumeId = volume;
1127 summary->RWvolumeId = rwvolume;
1128 summary->nInodes = n;
1129 summary->nSpecialInodes = nSpecial;
1130 summary->maxUniquifier = maxunique;
1134 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1136 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1137 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1138 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1143 * Collect list of inodes in file named by path. If a truly fatal error,
1144 * unlink the file and abort. For lessor errors, return -1. The file will
1145 * be unlinked by the caller.
1148 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1152 struct ViceInodeInfo *ip, *ip_save;
1153 struct InodeSummary summary;
1154 char summaryFileName[50];
1155 FD_t summaryFile = INVALID_FD;
1157 char *dev = salvinfo->fileSysPath;
1158 char *wpath = salvinfo->fileSysPath;
1160 char *dev = salvinfo->fileSysDeviceName;
1161 char *wpath = salvinfo->filesysfulldev;
1163 char *part = salvinfo->fileSysPath;
1166 afs_sfsize_t st_size;
1168 /* This file used to come from vfsck; cobble it up ourselves now... */
1170 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1171 singleVolumeNumber ? OnlyOneVolume : 0,
1172 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1174 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1177 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1179 if (forceSal && !ForceSalvage) {
1180 Log("***Forced salvage of all volumes on this partition***\n");
1183 OS_SEEK(inodeFile, 0L, SEEK_SET);
1184 salvinfo->inodeFd = inodeFile;
1185 if (salvinfo->inodeFd == INVALID_FD ||
1186 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1187 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1189 tdir = (tmpdir ? tmpdir : part);
1191 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1192 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1194 snprintf(summaryFileName, sizeof summaryFileName,
1195 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1197 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1198 if (summaryFile == INVALID_FD) {
1199 Abort("Unable to create inode summary file\n");
1203 /* Using nt_unlink here since we're really using the delete on close
1204 * semantics of unlink. In most places in the salvager, we really do
1205 * mean to unlink the file at that point. Those places have been
1206 * modified to actually do that so that the NT crt can be used there.
1208 * jaltman - As commented elsewhere, this cannot work because fopen()
1209 * does not open files with DELETE and FILE_SHARE_DELETE.
1211 code = nt_unlink(summaryFileName);
1213 code = unlink(summaryFileName);
1216 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1219 if (!canfork || debug || Fork() == 0) {
1220 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1222 OS_CLOSE(summaryFile);
1223 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1224 RemoveTheForce(salvinfo->fileSysPath);
1226 struct VolumeSummary *vsp;
1229 GetVolumeSummary(salvinfo, singleVolumeNumber);
1231 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1233 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1236 Log("%s vice inodes on %s; not salvaged\n",
1237 singleVolumeNumber ? "No applicable" : "No", dev);
1240 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1242 OS_CLOSE(summaryFile);
1244 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1247 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1248 OS_CLOSE(summaryFile);
1249 Abort("Unable to read inode table; %s not salvaged\n", dev);
1251 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1252 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1253 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1254 OS_CLOSE(summaryFile);
1255 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1260 CountVolumeInodes(ip, nInodes, &summary);
1261 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1262 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1263 OS_CLOSE(summaryFile);
1266 summary.index += (summary.nInodes);
1267 nInodes -= summary.nInodes;
1268 ip += summary.nInodes;
1271 ip = ip_save = NULL;
1272 /* Following fflush is not fclose, because if it was debug mode would not work */
1273 if (OS_SYNC(summaryFile) == -1) {
1274 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1275 OS_CLOSE(summaryFile);
1278 if (canfork && !debug) {
1283 if (Wait("Inode summary") == -1) {
1284 OS_CLOSE(summaryFile);
1285 Exit(1); /* salvage of this partition aborted */
1289 st_size = OS_SIZE(summaryFile);
1290 osi_Assert(st_size >= 0);
1293 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1294 osi_Assert(salvinfo->inodeSummary != NULL);
1295 /* For GNU we need to do lseek to get the file pointer moved. */
1296 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1297 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1298 osi_Assert(ret == st_size);
1300 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1301 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1302 salvinfo->inodeSummary[i].volSummary = NULL;
1304 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1305 OS_CLOSE(summaryFile);
1309 /* Comparison routine for volume sort.
1310 This is setup so that a read-write volume comes immediately before
1311 any read-only clones of that volume */
1313 CompareVolumes(const void *_p1, const void *_p2)
1315 const struct VolumeSummary *p1 = _p1;
1316 const struct VolumeSummary *p2 = _p2;
1317 if (p1->header.parent != p2->header.parent)
1318 return p1->header.parent < p2->header.parent ? -1 : 1;
1319 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1321 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1323 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1327 * Gleans volumeSummary information by asking the fileserver
1329 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1330 * salvaging a whole partition
1332 * @return whether we obtained the volume summary information or not
1333 * @retval 0 success; we obtained the volume summary information
1334 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1336 * @retval 1 we did not get the volume summary information; either the
1337 * fileserver responded with an error, or we are not supposed to
1338 * ask the fileserver for the information (e.g. we are salvaging
1339 * the entire partition or we are not the salvageserver)
1341 * @note for non-DAFS, always returns 1
1344 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1347 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1348 if (programType == salvageServer) {
1349 if (singleVolumeNumber) {
1350 FSSYNC_VGQry_response_t q_res;
1352 struct VolumeSummary *vsp;
1354 struct VolumeDiskHeader diskHdr;
1356 memset(&res, 0, sizeof(res));
1358 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1361 * We must wait for the partition to finish scanning before
1362 * can continue, since we will not know if we got the entire
1363 * VG membership unless the partition is fully scanned.
1364 * We could, in theory, just scan the partition ourselves if
1365 * the VG cache is not ready, but we would be doing the exact
1366 * same scan the fileserver is doing; it will almost always
1367 * be faster to wait for the fileserver. The only exceptions
1368 * are if the partition does not take very long to scan, and
1369 * in that case it's fast either way, so who cares?
1371 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1372 Log("waiting for fileserver to finish scanning partition %s...\n",
1373 salvinfo->fileSysPartition->name);
1375 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1376 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1377 * just so small partitions don't need to wait over 10
1378 * seconds every time, and large partitions are generally
1379 * polled only once every ten seconds. */
1380 sleep((i > 10) ? (i = 10) : i);
1382 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1386 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1387 /* This can happen if there's no header for the volume
1388 * we're salvaging, or no headers exist for the VG (if
1389 * we're salvaging an RW). Act as if we got a response
1390 * with no VG members. The headers may be created during
1391 * salvaging, if there are inodes in this VG. */
1393 memset(&q_res, 0, sizeof(q_res));
1394 q_res.rw = singleVolumeNumber;
1398 Log("fileserver refused VGCQuery request for volume %lu on "
1399 "partition %s, code %ld reason %ld\n",
1400 afs_printable_uint32_lu(singleVolumeNumber),
1401 salvinfo->fileSysPartition->name,
1402 afs_printable_int32_ld(code),
1403 afs_printable_int32_ld(res.hdr.reason));
1407 if (q_res.rw != singleVolumeNumber) {
1408 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1409 afs_printable_uint32_lu(singleVolumeNumber),
1410 afs_printable_uint32_lu(q_res.rw));
1411 #ifdef SALVSYNC_BUILD_CLIENT
1412 if (SALVSYNC_LinkVolume(q_res.rw,
1414 salvinfo->fileSysPartition->name,
1416 Log("schedule request failed\n");
1418 #endif /* SALVSYNC_BUILD_CLIENT */
1419 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1422 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1423 osi_Assert(salvinfo->volumeSummaryp != NULL);
1425 salvinfo->nVolumes = 0;
1426 vsp = salvinfo->volumeSummaryp;
1428 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1429 char name[VMAXPATHLEN];
1431 if (!q_res.children[i]) {
1435 /* AskOffline for singleVolumeNumber was called much earlier */
1436 if (q_res.children[i] != singleVolumeNumber) {
1437 AskOffline(salvinfo, q_res.children[i]);
1438 if (LockVolume(salvinfo, q_res.children[i])) {
1444 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1446 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1447 afs_printable_uint32_lu(q_res.children[i]));
1452 DiskToVolumeHeader(&vsp->header, &diskHdr);
1453 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1454 vsp->fileName = ToString(name);
1455 salvinfo->nVolumes++;
1459 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1464 Log("Cannot get volume summary from fileserver; falling back to scanning "
1465 "entire partition\n");
1468 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1473 * count how many volume headers are found by VWalkVolumeHeaders.
1475 * @param[in] dp the disk partition (unused)
1476 * @param[in] name full path to the .vol header (unused)
1477 * @param[in] hdr the header data (unused)
1478 * @param[in] last whether this is the last try or not (unused)
1479 * @param[in] rock actually an afs_int32*; the running count of how many
1480 * volumes we have found
1485 CountHeader(struct DiskPartition64 *dp, const char *name,
1486 struct VolumeDiskHeader *hdr, int last, void *rock)
1488 afs_int32 *nvols = (afs_int32 *)rock;
1494 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1497 struct SalvageScanParams {
1498 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1499 * vol id of the VG we're salvaging */
1500 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1501 * we're filling in */
1502 afs_int32 nVolumes; /**< # of vols we've encountered */
1503 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1504 * # of vols we've alloc'd memory for) */
1505 int retry; /**< do we need to retry vol lock/checkout? */
1506 struct SalvInfo *salvinfo; /**< salvage job info */
1510 * records volume summary info found from VWalkVolumeHeaders.
1512 * Found volumes are also taken offline if they are in the specific volume
1513 * group we are looking for.
1515 * @param[in] dp the disk partition
1516 * @param[in] name full path to the .vol header
1517 * @param[in] hdr the header data
1518 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1519 * @param[in] rock actually a struct SalvageScanParams*, containing the
1520 * information needed to record the volume summary data
1522 * @return operation status
1524 * @retval -1 volume locking raced with fileserver restart; checking out
1525 * and locking volumes needs to be retried
1526 * @retval 1 volume header is mis-named and should be deleted
1529 RecordHeader(struct DiskPartition64 *dp, const char *name,
1530 struct VolumeDiskHeader *hdr, int last, void *rock)
1532 char nameShouldBe[64];
1533 struct SalvageScanParams *params;
1534 struct VolumeSummary summary;
1535 VolumeId singleVolumeNumber;
1536 struct SalvInfo *salvinfo;
1538 params = (struct SalvageScanParams *)rock;
1540 singleVolumeNumber = params->singleVolumeNumber;
1541 salvinfo = params->salvinfo;
1543 DiskToVolumeHeader(&summary.header, hdr);
1545 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1546 && summary.header.parent != singleVolumeNumber) {
1548 if (programType == salvageServer) {
1549 #ifdef SALVSYNC_BUILD_CLIENT
1550 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1551 summary.header.id, summary.header.parent);
1552 if (SALVSYNC_LinkVolume(summary.header.parent,
1556 Log("schedule request failed\n");
1559 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1562 Log("%u is a read-only volume; not salvaged\n",
1563 singleVolumeNumber);
1568 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1569 || summary.header.parent == singleVolumeNumber) {
1571 /* check if the header file is incorrectly named */
1573 const char *base = strrchr(name, OS_DIRSEPC);
1580 snprintf(nameShouldBe, sizeof nameShouldBe,
1581 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1584 if (strcmp(nameShouldBe, base)) {
1585 /* .vol file has wrong name; retry/delete */
1589 if (!badname || last) {
1590 /* only offline the volume if the header is good, or if this is
1591 * the last try looking at it; avoid AskOffline'ing the same vol
1594 if (singleVolumeNumber
1595 && summary.header.id != singleVolumeNumber) {
1596 /* don't offline singleVolumeNumber; we already did that
1599 AskOffline(salvinfo, summary.header.id);
1601 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1603 /* don't lock the volume if the header is bad, since we're
1604 * about to delete it anyway. */
1605 if (LockVolume(salvinfo, summary.header.id)) {
1610 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1614 if (last && !Showmode) {
1615 Log("Volume header file %s is incorrectly named (should be %s "
1616 "not %s); %sdeleted (it will be recreated later, if "
1617 "necessary)\n", name, nameShouldBe, base,
1618 (Testing ? "it would have been " : ""));
1623 summary.fileName = ToString(base);
1626 if (params->nVolumes > params->totalVolumes) {
1627 /* We found more volumes than we found on the first partition walk;
1628 * apparently something created a volume while we were
1629 * partition-salvaging, or we found more than 20 vols when salvaging a
1630 * particular volume. Abort if we detect this, since other programs
1631 * supposed to not touch the partition while it is partition-salvaging,
1632 * and we shouldn't find more than 20 vols in a VG.
1634 Abort("Found %ld vol headers, but should have found at most %ld! "
1635 "Make sure the volserver/fileserver are not running at the "
1636 "same time as a partition salvage\n",
1637 afs_printable_int32_ld(params->nVolumes),
1638 afs_printable_int32_ld(params->totalVolumes));
1641 memcpy(params->vsp, &summary, sizeof(summary));
1649 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1651 * If the header could not be read in at all, the header is always unlinked.
1652 * If instead RecordHeader said the header was bad (that is, the header file
1653 * is mis-named), we only unlink if we are doing a partition salvage, as
1654 * opposed to salvaging a specific volume group.
1656 * @param[in] dp the disk partition
1657 * @param[in] name full path to the .vol header
1658 * @param[in] hdr header data, or NULL if the header could not be read
1659 * @param[in] rock actually a struct SalvageScanParams*, with some information
1663 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1664 struct VolumeDiskHeader *hdr, void *rock)
1666 struct SalvageScanParams *params;
1669 params = (struct SalvageScanParams *)rock;
1672 /* no header; header is too bogus to read in at all */
1674 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1680 } else if (!params->singleVolumeNumber) {
1681 /* We were able to read in a header, but RecordHeader said something
1682 * was wrong with it. We only unlink those if we are doing a partition
1689 if (dounlink && unlink(name)) {
1690 Log("Error %d while trying to unlink %s\n", errno, name);
1695 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1696 * the fileserver for VG information, or by scanning the /vicepX partition.
1698 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1699 * are salvaging, or 0 if this is a partition
1702 * @return operation status
1704 * @retval -1 we raced with a fileserver restart; checking out and locking
1705 * volumes must be retried
1708 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1710 afs_int32 nvols = 0;
1711 struct SalvageScanParams params;
1714 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1716 /* we successfully got the vol information from the fileserver; no
1717 * need to scan the partition */
1721 /* we need to retry volume checkout */
1725 if (!singleVolumeNumber) {
1726 /* Count how many volumes we have in /vicepX */
1727 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1730 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1735 nvols = VOL_VG_MAX_VOLS;
1738 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1739 osi_Assert(salvinfo->volumeSummaryp != NULL);
1741 params.singleVolumeNumber = singleVolumeNumber;
1742 params.vsp = salvinfo->volumeSummaryp;
1743 params.nVolumes = 0;
1744 params.totalVolumes = nvols;
1746 params.salvinfo = salvinfo;
1748 /* walk the partition directory of volume headers and record the info
1749 * about them; unlinking invalid headers */
1750 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1751 UnlinkHeader, ¶ms);
1753 /* we apparently need to retry checking-out/locking volumes */
1757 Abort("Failed to get volume header summary\n");
1759 salvinfo->nVolumes = params.nVolumes;
1761 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1767 /* Find the link table. This should be associated with the RW volume or, if
1768 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1771 FindLinkHandle(struct InodeSummary *isp, int nVols,
1772 struct ViceInodeInfo *allInodes)
1775 struct ViceInodeInfo *ip;
1777 for (i = 0; i < nVols; i++) {
1778 ip = allInodes + isp[i].index;
1779 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1780 if (ip[j].u.special.type == VI_LINKTABLE)
1781 return ip[j].inodeNumber;
1788 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1790 struct versionStamp version;
1793 if (!VALID_INO(ino))
1795 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1796 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1797 if (!VALID_INO(ino))
1799 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1800 isp->RWvolumeId, errno);
1801 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1802 fdP = IH_OPEN(salvinfo->VGLinkH);
1804 Abort("Can't open link table for volume %u (error = %d)\n",
1805 isp->RWvolumeId, errno);
1807 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1808 Abort("Can't truncate link table for volume %u (error = %d)\n",
1809 isp->RWvolumeId, errno);
1811 version.magic = LINKTABLEMAGIC;
1812 version.version = LINKTABLEVERSION;
1814 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1816 Abort("Can't truncate link table for volume %u (error = %d)\n",
1817 isp->RWvolumeId, errno);
1819 FDH_REALLYCLOSE(fdP);
1821 /* If the volume summary exits (i.e., the V*.vol header file exists),
1822 * then set this inode there as well.
1824 if (isp->volSummary)
1825 isp->volSummary->header.linkTable = ino;
1834 SVGParms_t *parms = (SVGParms_t *) arg;
1835 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1840 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1843 pthread_attr_t tattr;
1847 /* Initialize per volume global variables, even if later code does so */
1848 salvinfo->VolumeChanged = 0;
1849 salvinfo->VGLinkH = NULL;
1850 salvinfo->VGLinkH_cnt = 0;
1851 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1853 parms.svgp_inodeSummaryp = isp;
1854 parms.svgp_count = nVols;
1855 parms.svgp_salvinfo = salvinfo;
1856 code = pthread_attr_init(&tattr);
1858 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1862 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1864 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1867 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1869 Log("Failed to create thread to salvage volume group %u\n",
1873 (void)pthread_join(tid, NULL);
1875 #endif /* AFS_NT40_ENV */
1878 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1880 struct ViceInodeInfo *inodes, *allInodes, *ip;
1881 int i, totalInodes, size, salvageTo;
1885 int dec_VGLinkH = 0;
1887 FdHandle_t *fdP = NULL;
1889 salvinfo->VGLinkH_cnt = 0;
1890 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1891 && isp->nSpecialInodes > 0);
1892 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1893 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1896 if (ShowMounts && !haveRWvolume)
1898 if (canfork && !debug && Fork() != 0) {
1899 (void)Wait("Salvage volume group");
1902 for (i = 0, totalInodes = 0; i < nVols; i++)
1903 totalInodes += isp[i].nInodes;
1904 size = totalInodes * sizeof(struct ViceInodeInfo);
1905 inodes = (struct ViceInodeInfo *)malloc(size);
1906 allInodes = inodes - isp->index; /* this would the base of all the inodes
1907 * for the partition, if all the inodes
1908 * had been read into memory */
1910 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1912 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1914 /* Don't try to salvage a read write volume if there isn't one on this
1916 salvageTo = haveRWvolume ? 0 : 1;
1918 #ifdef AFS_NAMEI_ENV
1919 ino = FindLinkHandle(isp, nVols, allInodes);
1920 if (VALID_INO(ino)) {
1921 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1922 fdP = IH_OPEN(salvinfo->VGLinkH);
1924 if (!VALID_INO(ino) || fdP == NULL) {
1925 Log("%s link table for volume %u.\n",
1926 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1928 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1931 struct ViceInodeInfo *ip;
1932 CreateLinkTable(salvinfo, isp, ino);
1933 fdP = IH_OPEN(salvinfo->VGLinkH);
1934 /* Sync fake 1 link counts to the link table, now that it exists */
1936 for (i = 0; i < nVols; i++) {
1937 ip = allInodes + isp[i].index;
1938 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1939 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1946 FDH_REALLYCLOSE(fdP);
1948 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1951 /* Salvage in reverse order--read/write volume last; this way any
1952 * Inodes not referenced by the time we salvage the read/write volume
1953 * can be picked up by the read/write volume */
1954 /* ACTUALLY, that's not done right now--the inodes just vanish */
1955 for (i = nVols - 1; i >= salvageTo; i--) {
1957 struct InodeSummary *lisp = &isp[i];
1958 #ifdef AFS_NAMEI_ENV
1959 /* If only the RO is present on this partition, the link table
1960 * shows up as a RW volume special file. Need to make sure the
1961 * salvager doesn't try to salvage the non-existent RW.
1963 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1964 /* If this only special inode is the link table, continue */
1965 if (inodes->u.special.type == VI_LINKTABLE) {
1972 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1973 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1974 /* Check inodes twice. The second time do things seriously. This
1975 * way the whole RO volume can be deleted, below, if anything goes wrong */
1976 for (check = 1; check >= 0; check--) {
1978 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1980 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1981 if (rw && deleteMe) {
1982 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1983 * volume won't be called */
1989 if (rw && check == 1)
1991 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1992 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1998 /* Fix actual inode counts */
2001 Log("totalInodes %d\n",totalInodes);
2002 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2003 static int TraceBadLinkCounts = 0;
2004 #ifdef AFS_NAMEI_ENV
2005 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2006 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2007 VGLinkH_p1 = ip->u.param[0];
2008 continue; /* Deal with this last. */
2011 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2012 TraceBadLinkCounts--; /* Limit reports, per volume */
2013 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2015 while (ip->linkCount > 0) {
2016 /* below used to assert, not break */
2018 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2019 Log("idec failed. inode %s errno %d\n",
2020 PrintInode(stmp, ip->inodeNumber), errno);
2026 while (ip->linkCount < 0) {
2027 /* these used to be asserts */
2029 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2030 Log("iinc failed. inode %s errno %d\n",
2031 PrintInode(stmp, ip->inodeNumber), errno);
2038 #ifdef AFS_NAMEI_ENV
2039 while (dec_VGLinkH > 0) {
2040 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2041 Log("idec failed on link table, errno = %d\n", errno);
2045 while (dec_VGLinkH < 0) {
2046 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2047 Log("iinc failed on link table, errno = %d\n", errno);
2054 /* Directory consistency checks on the rw volume */
2056 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2057 IH_RELEASE(salvinfo->VGLinkH);
2059 if (canfork && !debug) {
2066 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2068 /* Check headers BEFORE forking */
2072 for (i = 0; i < nVols; i++) {
2073 struct VolumeSummary *vs = isp[i].volSummary;
2074 VolumeDiskData volHeader;
2076 /* Don't salvage just because phantom rw volume is there... */
2077 /* (If a read-only volume exists, read/write inodes must also exist) */
2078 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2082 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2083 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2084 == sizeof(volHeader)
2085 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2086 && volHeader.dontSalvage == DONT_SALVAGE
2087 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2088 if (volHeader.inUse != 0) {
2089 volHeader.inUse = 0;
2090 volHeader.inService = 1;
2092 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2093 != sizeof(volHeader)) {
2109 /* SalvageVolumeHeaderFile
2111 * Salvage the top level V*.vol header file. Make sure the special files
2112 * exist and that there are no duplicates.
2114 * Calls SalvageHeader for each possible type of volume special file.
2118 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2119 struct ViceInodeInfo *inodes, int RW,
2120 int check, int *deleteMe)
2123 struct ViceInodeInfo *ip;
2124 int allinodesobsolete = 1;
2125 struct VolumeDiskHeader diskHeader;
2126 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2128 struct VolumeHeader tempHeader;
2129 struct afs_inode_info stuff[MAXINODETYPE];
2131 /* keeps track of special inodes that are probably 'good'; they are
2132 * referenced in the vol header, and are included in the given inodes
2137 } goodspecial[MAXINODETYPE];
2142 memset(goodspecial, 0, sizeof(goodspecial));
2144 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2146 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2148 Log("cannot allocate memory for inode skip array when salvaging "
2149 "volume %lu; not performing duplicate special inode recovery\n",
2150 afs_printable_uint32_lu(isp->volumeId));
2151 /* still try to perform the salvage; the skip array only does anything
2152 * if we detect duplicate special inodes */
2155 init_inode_info(&tempHeader, stuff);
2158 * First, look at the special inodes and see if any are referenced by
2159 * the existing volume header. If we find duplicate special inodes, we
2160 * can use this information to use the referenced inode (it's more
2161 * likely to be the 'good' one), and throw away the duplicates.
2163 if (isp->volSummary && skip) {
2164 /* use tempHeader, so we can use the stuff[] array to easily index
2165 * into the isp->volSummary special inodes */
2166 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2168 for (i = 0; i < isp->nSpecialInodes; i++) {
2169 ip = &inodes[isp->index + i];
2170 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2171 /* will get taken care of in a later loop */
2174 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2175 goodspecial[ip->u.special.type-1].valid = 1;
2176 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2181 memset(&tempHeader, 0, sizeof(tempHeader));
2182 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2183 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2184 tempHeader.id = isp->volumeId;
2185 tempHeader.parent = isp->RWvolumeId;
2187 /* Check for duplicates (inodes are sorted by type field) */
2188 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2189 ip = &inodes[isp->index + i];
2190 if (ip->u.special.type == (ip + 1)->u.special.type) {
2191 afs_ino_str_t stmp1, stmp2;
2193 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2194 /* Will be caught in the loop below */
2198 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2199 ip->u.special.type, isp->volumeId,
2200 PrintInode(stmp1, ip->inodeNumber),
2201 PrintInode(stmp2, (ip+1)->inodeNumber));
2203 if (skip && goodspecial[ip->u.special.type-1].valid) {
2204 Inode gi = goodspecial[ip->u.special.type-1].inode;
2207 Log("using special inode referenced by vol header (%s)\n",
2208 PrintInode(stmp1, gi));
2211 /* the volume header references some special inode of
2212 * this type in the inodes array; are we it? */
2213 if (ip->inodeNumber != gi) {
2215 } else if ((ip+1)->inodeNumber != gi) {
2216 /* in case this is the last iteration; we need to
2217 * make sure we check ip+1, too */
2222 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2230 for (i = 0; i < isp->nSpecialInodes; i++) {
2232 ip = &inodes[isp->index + i];
2233 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2235 Log("Rubbish header inode %s of type %d\n",
2236 PrintInode(stmp, ip->inodeNumber),
2237 ip->u.special.type);
2243 Log("Rubbish header inode %s of type %d; deleted\n",
2244 PrintInode(stmp, ip->inodeNumber),
2245 ip->u.special.type);
2246 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2247 if (skip && skip[i]) {
2248 if (orphans == ORPH_REMOVE) {
2249 Log("Removing orphan special inode %s of type %d\n",
2250 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2253 Log("Ignoring orphan special inode %s of type %d\n",
2254 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2255 /* fall through to the ip->linkCount--; line below */
2258 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2259 allinodesobsolete = 0;
2261 if (!check && ip->u.special.type != VI_LINKTABLE)
2262 ip->linkCount--; /* Keep the inode around */
2270 if (allinodesobsolete) {
2277 salvinfo->VGLinkH_cnt++; /* one for every header. */
2279 if (!RW && !check && isp->volSummary) {
2280 ClearROInUseBit(isp->volSummary);
2284 for (i = 0; i < MAXINODETYPE; i++) {
2285 if (stuff[i].inodeType == VI_LINKTABLE) {
2286 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2287 * And we may have recreated the link table earlier, so set the
2288 * RW header as well.
2290 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2291 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2295 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2299 if (isp->volSummary == NULL) {
2301 char headerName[64];
2302 snprintf(headerName, sizeof headerName, VFORMAT,
2303 afs_printable_uint32_lu(isp->volumeId));
2304 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2305 salvinfo->fileSysPath, headerName);
2307 Log("No header file for volume %u\n", isp->volumeId);
2311 Log("No header file for volume %u; %screating %s\n",
2312 isp->volumeId, (Testing ? "it would have been " : ""),
2314 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2315 isp->volSummary->fileName = ToString(headerName);
2317 writefunc = VCreateVolumeDiskHeader;
2320 char headerName[64];
2321 /* hack: these two fields are obsolete... */
2322 isp->volSummary->header.volumeAcl = 0;
2323 isp->volSummary->header.volumeMountTable = 0;
2326 (&isp->volSummary->header, &tempHeader,
2327 sizeof(struct VolumeHeader))) {
2328 /* We often remove the name before calling us, so we make a fake one up */
2329 if (isp->volSummary->fileName) {
2330 strcpy(headerName, isp->volSummary->fileName);
2332 snprintf(headerName, sizeof headerName, VFORMAT,
2333 afs_printable_uint32_lu(isp->volumeId));
2334 isp->volSummary->fileName = ToString(headerName);
2336 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2337 salvinfo->fileSysPath, headerName);
2339 Log("Header file %s is damaged or no longer valid%s\n", path,
2340 (check ? "" : "; repairing"));
2344 writefunc = VWriteVolumeDiskHeader;
2348 memcpy(&isp->volSummary->header, &tempHeader,
2349 sizeof(struct VolumeHeader));
2352 Log("It would have written a new header file for volume %u\n",
2356 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2357 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2359 Log("Error %ld writing volume header file for volume %lu\n",
2360 afs_printable_int32_ld(code),
2361 afs_printable_uint32_lu(diskHeader.id));
2366 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2367 isp->volSummary->header.volumeInfo);
2372 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2373 struct InodeSummary *isp, int check, int *deleteMe)
2376 VolumeDiskData volumeInfo;
2377 struct versionStamp fileHeader;
2386 #ifndef AFS_NAMEI_ENV
2387 if (sp->inodeType == VI_LINKTABLE)
2390 if (*(sp->inode) == 0) {
2392 Log("Missing inode in volume header (%s)\n", sp->description);
2396 Log("Missing inode in volume header (%s); %s\n", sp->description,
2397 (Testing ? "it would have recreated it" : "recreating"));
2400 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2401 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2402 if (!VALID_INO(*(sp->inode)))
2404 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2405 sp->description, errno);
2410 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2411 fdP = IH_OPEN(specH);
2412 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2413 /* bail out early and destroy the volume */
2415 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2422 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2423 sp->description, errno);
2426 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2427 || header.fileHeader.magic != sp->stamp.magic)) {
2429 Log("Part of the header (%s) is corrupted\n", sp->description);
2430 FDH_REALLYCLOSE(fdP);
2434 Log("Part of the header (%s) is corrupted; recreating\n",
2437 /* header can be garbage; make sure we don't read garbage data from
2439 memset(&header, 0, sizeof(header));
2441 if (sp->inodeType == VI_VOLINFO
2442 && header.volumeInfo.destroyMe == DESTROY_ME) {
2445 FDH_REALLYCLOSE(fdP);
2449 if (recreate && !Testing) {
2452 ("Internal error: recreating volume header (%s) in check mode\n",
2454 nBytes = FDH_TRUNC(fdP, 0);
2456 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2457 sp->description, errno);
2459 /* The following code should be moved into vutil.c */
2460 if (sp->inodeType == VI_VOLINFO) {
2462 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2463 header.volumeInfo.stamp = sp->stamp;
2464 header.volumeInfo.id = isp->volumeId;
2465 header.volumeInfo.parentId = isp->RWvolumeId;
2466 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2467 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2468 isp->volumeId, isp->volumeId);
2469 header.volumeInfo.inService = 0;
2470 header.volumeInfo.blessed = 0;
2471 /* The + 1000 is a hack in case there are any files out in venus caches */
2472 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2473 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2474 header.volumeInfo.needsCallback = 0;
2475 gettimeofday(&tp, 0);
2476 header.volumeInfo.creationDate = tp.tv_sec;
2478 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2479 sizeof(header.volumeInfo), 0);
2480 if (nBytes != sizeof(header.volumeInfo)) {
2483 ("Unable to write volume header file (%s) (errno = %d)\n",
2484 sp->description, errno);
2485 Abort("Unable to write entire volume header file (%s)\n",
2489 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2490 if (nBytes != sizeof(sp->stamp)) {
2493 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2494 sp->description, errno);
2496 ("Unable to write entire version stamp in volume header file (%s)\n",
2501 FDH_REALLYCLOSE(fdP);
2503 if (sp->inodeType == VI_VOLINFO) {
2504 salvinfo->VolInfo = header.volumeInfo;
2508 if (salvinfo->VolInfo.updateDate) {
2509 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2511 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2512 salvinfo->VolInfo.id,
2513 (Testing ? "it would have been " : ""), update);
2515 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2517 Log("%s (%u) not updated (created %s)\n",
2518 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2528 SalvageVnodes(struct SalvInfo *salvinfo,
2529 struct InodeSummary *rwIsp,
2530 struct InodeSummary *thisIsp,
2531 struct ViceInodeInfo *inodes, int check)
2533 int ilarge, ismall, ioffset, RW, nInodes;
2534 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2537 RW = (rwIsp == thisIsp);
2538 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2540 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2541 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2542 if (check && ismall == -1)
2545 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2546 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2547 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2551 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2552 struct ViceInodeInfo *ip, int nInodes,
2553 struct VolumeSummary *volSummary, int check)
2555 char buf[SIZEOF_LARGEDISKVNODE];
2556 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2558 StreamHandle_t *file;
2559 struct VnodeClassInfo *vcp;
2561 afs_sfsize_t nVnodes;
2562 afs_fsize_t vnodeLength;
2564 afs_ino_str_t stmp1, stmp2;
2568 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2569 fdP = IH_OPEN(handle);
2570 osi_Assert(fdP != NULL);
2571 file = FDH_FDOPEN(fdP, "r+");
2572 osi_Assert(file != NULL);
2573 vcp = &VnodeClassInfo[class];
2574 size = OS_SIZE(fdP->fd_fd);
2575 osi_Assert(size != -1);
2576 nVnodes = (size / vcp->diskSize) - 1;
2578 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2579 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2583 for (vnodeIndex = 0;
2584 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2585 nVnodes--, vnodeIndex++) {
2586 if (vnode->type != vNull) {
2587 int vnodeChanged = 0;
2588 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2589 if (VNDISK_GET_INO(vnode) == 0) {
2591 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2592 memset(vnode, 0, vcp->diskSize);
2596 if (vcp->magic != vnode->vnodeMagic) {
2597 /* bad magic #, probably partially created vnode */
2599 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2600 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2601 afs_printable_uint32_lu(vcp->magic));
2602 memset(vnode, 0, vcp->diskSize);
2606 Log("Partially allocated vnode %d deleted.\n",
2608 memset(vnode, 0, vcp->diskSize);
2612 /* ****** Should do a bit more salvage here: e.g. make sure
2613 * vnode type matches what it should be given the index */
2614 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2615 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2616 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2617 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2624 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2625 /* The following doesn't work, because the version number
2626 * is not maintained correctly by the file server */
2627 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2628 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2630 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2636 /* For RW volume, look for vnode with matching inode number;
2637 * if no such match, take the first determined by our sort
2639 struct ViceInodeInfo *lip = ip;
2640 int lnInodes = nInodes;
2642 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2643 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2652 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2653 /* "Matching" inode */
2657 vu = vnode->uniquifier;
2658 iu = ip->u.vnode.vnodeUniquifier;
2659 vd = vnode->dataVersion;
2660 id = ip->u.vnode.inodeDataVersion;
2662 * Because of the possibility of the uniquifier overflows (> 4M)
2663 * we compare them modulo the low 22-bits; we shouldn't worry
2664 * about mismatching since they shouldn't to many old
2665 * uniquifiers of the same vnode...
2667 if (IUnique(vu) != IUnique(iu)) {
2669 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2672 vnode->uniquifier = iu;
2673 #ifdef AFS_3DISPARES
2674 vnode->dataVersion = (id >= vd ?
2677 1887437 ? vd : id) :
2680 1887437 ? id : vd));
2682 #if defined(AFS_SGI_EXMAG)
2683 vnode->dataVersion = (id >= vd ?
2686 15099494 ? vd : id) :
2689 15099494 ? id : vd));
2691 vnode->dataVersion = (id > vd ? id : vd);
2692 #endif /* AFS_SGI_EXMAG */
2693 #endif /* AFS_3DISPARES */
2696 /* don't bother checking for vd > id any more, since
2697 * partial file transfers always result in this state,
2698 * and you can't do much else anyway (you've already
2699 * found the best data you can) */
2700 #ifdef AFS_3DISPARES
2701 if (!vnodeIsDirectory(vnodeNumber)
2702 && ((vd < id && (id - vd) < 1887437)
2703 || ((vd > id && (vd - id) > 1887437)))) {
2705 #if defined(AFS_SGI_EXMAG)
2706 if (!vnodeIsDirectory(vnodeNumber)
2707 && ((vd < id && (id - vd) < 15099494)
2708 || ((vd > id && (vd - id) > 15099494)))) {
2710 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2711 #endif /* AFS_SGI_EXMAG */
2714 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2715 vnode->dataVersion = id;
2720 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2723 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2725 VNDISK_SET_INO(vnode, ip->inodeNumber);
2730 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2732 VNDISK_SET_INO(vnode, ip->inodeNumber);
2735 VNDISK_GET_LEN(vnodeLength, vnode);
2736 if (ip->byteCount != vnodeLength) {
2739 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2744 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2745 VNDISK_SET_LEN(vnode, ip->byteCount);
2749 ip->linkCount--; /* Keep the inode around */
2752 } else { /* no matching inode */
2754 if (VNDISK_GET_INO(vnode) != 0
2755 || vnode->type == vDirectory) {
2756 /* No matching inode--get rid of the vnode */
2758 if (VNDISK_GET_INO(vnode)) {
2760 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2764 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2769 if (VNDISK_GET_INO(vnode)) {
2771 time_t serverModifyTime = vnode->serverModifyTime;
2772 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2776 time_t serverModifyTime = vnode->serverModifyTime;
2777 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2780 memset(vnode, 0, vcp->diskSize);
2783 /* Should not reach here becuase we checked for
2784 * (inodeNumber == 0) above. And where we zero the vnode,
2785 * we also goto vnodeDone.
2789 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2793 } /* VNDISK_GET_INO(vnode) != 0 */
2795 osi_Assert(!(vnodeChanged && check));
2796 if (vnodeChanged && !Testing) {
2797 osi_Assert(IH_IWRITE
2798 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2799 (char *)vnode, vcp->diskSize)
2801 salvinfo->VolumeChanged = 1; /* For break call back */
2812 struct VnodeEssence *
2813 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2816 struct VnodeInfo *vip;
2819 class = vnodeIdToClass(vnodeNumber);
2820 vip = &salvinfo->vnodeInfo[class];
2821 offset = vnodeIdToBitNumber(vnodeNumber);
2822 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2826 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2828 /* Copy the directory unconditionally if we are going to change it:
2829 * not just if was cloned.
2831 struct VnodeDiskObject vnode;
2832 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2833 Inode oldinode, newinode;
2836 if (dir->copied || Testing)
2838 DFlush(); /* Well justified paranoia... */
2841 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2842 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2844 osi_Assert(code == sizeof(vnode));
2845 oldinode = VNDISK_GET_INO(&vnode);
2846 /* Increment the version number by a whole lot to avoid problems with
2847 * clients that were promised new version numbers--but the file server
2848 * crashed before the versions were written to disk.
2851 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2852 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2854 osi_Assert(VALID_INO(newinode));
2855 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2857 VNDISK_SET_INO(&vnode, newinode);
2859 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2860 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2862 osi_Assert(code == sizeof(vnode));
2864 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2865 salvinfo->fileSysDevice, newinode,
2866 &salvinfo->VolumeChanged);
2867 /* Don't delete the original inode right away, because the directory is
2868 * still being scanned.
2874 * This function should either successfully create a new dir, or give up
2875 * and leave things the way they were. In particular, if it fails to write
2876 * the new dir properly, it should return w/o changing the reference to the
2880 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2882 struct VnodeDiskObject vnode;
2883 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2884 Inode oldinode, newinode;
2889 afs_int32 parentUnique = 1;
2890 struct VnodeEssence *vnodeEssence;
2895 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2897 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2898 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2900 osi_Assert(lcode == sizeof(vnode));
2901 oldinode = VNDISK_GET_INO(&vnode);
2902 /* Increment the version number by a whole lot to avoid problems with
2903 * clients that were promised new version numbers--but the file server
2904 * crashed before the versions were written to disk.
2907 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2908 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2910 osi_Assert(VALID_INO(newinode));
2911 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2912 &salvinfo->VolumeChanged);
2914 /* Assign . and .. vnode numbers from dir and vnode.parent.
2915 * The uniquifier for . is in the vnode.
2916 * The uniquifier for .. might be set to a bogus value of 1 and
2917 * the salvager will later clean it up.
2919 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2920 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2923 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2925 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2930 /* didn't really build the new directory properly, let's just give up. */
2931 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2932 Log("Directory salvage returned code %d, continuing.\n", code);
2934 Log("also failed to decrement link count on new inode");
2938 Log("Checking the results of the directory salvage...\n");
2939 if (!DirOK(&newdir)) {
2940 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2941 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2942 osi_Assert(code == 0);
2946 VNDISK_SET_INO(&vnode, newinode);
2947 length = Length(&newdir);
2948 VNDISK_SET_LEN(&vnode, length);
2950 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2951 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2953 osi_Assert(lcode == sizeof(vnode));
2956 nt_sync(salvinfo->fileSysDevice);
2958 sync(); /* this is slow, but hopefully rarely called. We don't have
2959 * an open FD on the file itself to fsync.
2963 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2965 /* make sure old directory file is really closed */
2966 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2967 FDH_REALLYCLOSE(fdP);
2969 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2970 osi_Assert(code == 0);
2971 dir->dirHandle = newdir;
2975 * arguments for JudgeEntry.
2977 struct judgeEntry_params {
2978 struct DirSummary *dir; /**< directory we're examining entries in */
2979 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2983 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2986 struct judgeEntry_params *params = arock;
2987 struct DirSummary *dir = params->dir;
2988 struct SalvInfo *salvinfo = params->salvinfo;
2989 struct VnodeEssence *vnodeEssence;
2990 afs_int32 dirOrphaned, todelete;
2992 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2994 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2995 if (vnodeEssence == NULL) {
2997 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3000 CopyOnWrite(salvinfo, dir);
3001 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3006 #ifndef AFS_NAMEI_ENV
3007 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3008 * mount inode for the partition. If this inode were deleted, it would crash
3011 if (vnodeEssence->InodeNumber == 0) {
3012 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3014 CopyOnWrite(salvinfo, dir);
3015 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3022 if (!(vnodeNumber & 1) && !Showmode
3023 && !(vnodeEssence->count || vnodeEssence->unique
3024 || vnodeEssence->modeBits)) {
3025 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3026 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3027 vnodeNumber, unique,
3028 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3032 CopyOnWrite(salvinfo, dir);
3033 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3039 /* Check if the Uniquifiers match. If not, change the directory entry
3040 * so its unique matches the vnode unique. Delete if the unique is zero
3041 * or if the directory is orphaned.
3043 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3044 if (!vnodeEssence->unique
3045 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3046 /* This is an orphaned directory. Don't delete the . or ..
3047 * entry. Otherwise, it will get created in the next
3048 * salvage and deleted again here. So Just skip it.
3053 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3056 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3060 fid.Vnode = vnodeNumber;
3061 fid.Unique = vnodeEssence->unique;
3062 CopyOnWrite(salvinfo, dir);
3063 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3065 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3068 return 0; /* no need to continue */
3071 if (strcmp(name, ".") == 0) {
3072 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3075 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3077 CopyOnWrite(salvinfo, dir);
3078 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3079 fid.Vnode = dir->vnodeNumber;
3080 fid.Unique = dir->unique;
3081 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3084 vnodeNumber = fid.Vnode; /* Get the new Essence */
3085 unique = fid.Unique;
3086 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3089 } else if (strcmp(name, "..") == 0) {
3092 struct VnodeEssence *dotdot;
3093 pa.Vnode = dir->parent;
3094 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3095 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3096 pa.Unique = dotdot->unique;
3098 pa.Vnode = dir->vnodeNumber;
3099 pa.Unique = dir->unique;
3101 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3103 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3105 CopyOnWrite(salvinfo, dir);
3106 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3107 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3110 vnodeNumber = pa.Vnode; /* Get the new Essence */
3112 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3114 dir->haveDotDot = 1;
3115 } else if (strncmp(name, ".__afs", 6) == 0) {
3117 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3120 CopyOnWrite(salvinfo, dir);
3121 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3123 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3124 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3127 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3128 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3129 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3130 && !(vnodeEssence->modeBits & 0111)) {
3131 afs_sfsize_t nBytes;
3137 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3138 vnodeEssence->InodeNumber);
3141 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3145 size = FDH_SIZE(fdP);
3147 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3148 FDH_REALLYCLOSE(fdP);
3155 nBytes = FDH_PREAD(fdP, buf, size, 0);
3156 if (nBytes == size) {
3158 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3159 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3160 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3161 Testing ? "would convert" : "converted");
3162 vnodeEssence->modeBits |= 0111;
3163 vnodeEssence->changed = 1;
3164 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3165 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3166 dir->name ? dir->name : "??", name, buf);
3168 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3169 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3171 FDH_REALLYCLOSE(fdP);
3174 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3175 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3176 if (vnodeIdToClass(vnodeNumber) == vLarge
3177 && vnodeEssence->name == NULL) {
3179 if ((n = (char *)malloc(strlen(name) + 1)))
3181 vnodeEssence->name = n;
3184 /* The directory entry points to the vnode. Check to see if the
3185 * vnode points back to the directory. If not, then let the
3186 * directory claim it (else it might end up orphaned). Vnodes
3187 * already claimed by another directory are deleted from this
3188 * directory: hardlinks to the same vnode are not allowed
3189 * from different directories.
3191 if (vnodeEssence->parent != dir->vnodeNumber) {
3192 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3193 /* Vnode does not point back to this directory.
3194 * Orphaned dirs cannot claim a file (it may belong to
3195 * another non-orphaned dir).
3198 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3200 vnodeEssence->parent = dir->vnodeNumber;
3201 vnodeEssence->changed = 1;
3203 /* Vnode was claimed by another directory */
3206 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3207 } else if (vnodeNumber == 1) {
3208 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3210 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3214 CopyOnWrite(salvinfo, dir);
3215 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3220 /* This directory claims the vnode */
3221 vnodeEssence->claimed = 1;
3223 vnodeEssence->count--;
3228 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3229 VnodeClass class, Inode ino, Unique * maxu)
3231 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3232 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3233 char buf[SIZEOF_LARGEDISKVNODE];
3234 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3236 StreamHandle_t *file;
3241 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3242 fdP = IH_OPEN(vip->handle);
3243 osi_Assert(fdP != NULL);
3244 file = FDH_FDOPEN(fdP, "r+");
3245 osi_Assert(file != NULL);
3246 size = OS_SIZE(fdP->fd_fd);
3247 osi_Assert(size != -1);
3248 vip->nVnodes = (size / vcp->diskSize) - 1;
3249 if (vip->nVnodes > 0) {
3250 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3251 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3252 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3253 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3254 if (class == vLarge) {
3255 osi_Assert((vip->inodes = (Inode *)
3256 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3265 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3266 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3267 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3268 nVnodes--, vnodeIndex++) {
3269 if (vnode->type != vNull) {
3270 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3271 afs_fsize_t vnodeLength;
3272 vip->nAllocatedVnodes++;
3273 vep->count = vnode->linkCount;
3274 VNDISK_GET_LEN(vnodeLength, vnode);
3275 vep->blockCount = nBlocks(vnodeLength);
3276 vip->volumeBlockCount += vep->blockCount;
3277 vep->parent = vnode->parent;
3278 vep->unique = vnode->uniquifier;
3279 if (*maxu < vnode->uniquifier)
3280 *maxu = vnode->uniquifier;
3281 vep->modeBits = vnode->modeBits;
3282 vep->InodeNumber = VNDISK_GET_INO(vnode);
3283 vep->type = vnode->type;
3284 vep->author = vnode->author;
3285 vep->owner = vnode->owner;
3286 vep->group = vnode->group;
3287 if (vnode->type == vDirectory) {
3288 if (class != vLarge) {
3289 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3290 vip->nAllocatedVnodes--;
3291 memset(vnode, 0, sizeof(vnode));
3292 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3293 vnodeIndexOffset(vcp, vnodeNumber),
3294 (char *)&vnode, sizeof(vnode));
3295 salvinfo->VolumeChanged = 1;
3297 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3306 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3309 struct VnodeEssence *parentvp;
3315 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3316 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3317 strcat(path, OS_DIRSEP);
3318 strcat(path, vp->name);
3324 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3325 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3328 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3330 struct VnodeEssence *vep;
3333 return (1); /* Vnode zero does not exist */
3335 return (0); /* The root dir vnode is always claimed */
3336 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3337 if (!vep || !vep->claimed)
3338 return (1); /* Vnode is not claimed - it is orphaned */
3340 return (IsVnodeOrphaned(salvinfo, vep->parent));
3344 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3345 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3346 struct DirSummary *rootdir, int *rootdirfound)
3348 static struct DirSummary dir;
3349 static struct DirHandle dirHandle;
3350 struct VnodeEssence *parent;
3351 static char path[MAXPATHLEN];
3354 if (dirVnodeInfo->vnodes[i].salvaged)
3355 return; /* already salvaged */
3358 dirVnodeInfo->vnodes[i].salvaged = 1;
3360 if (dirVnodeInfo->inodes[i] == 0)
3361 return; /* Not allocated to a directory */
3363 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3364 if (dirVnodeInfo->vnodes[i].parent) {
3365 Log("Bad parent, vnode 1; %s...\n",
3366 (Testing ? "skipping" : "salvaging"));
3367 dirVnodeInfo->vnodes[i].parent = 0;
3368 dirVnodeInfo->vnodes[i].changed = 1;
3371 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3372 if (parent && parent->salvaged == 0)
3373 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3374 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3375 rootdir, rootdirfound);
3378 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3379 dir.unique = dirVnodeInfo->vnodes[i].unique;
3382 dir.parent = dirVnodeInfo->vnodes[i].parent;
3383 dir.haveDot = dir.haveDotDot = 0;
3384 dir.ds_linkH = alinkH;
3385 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3386 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3388 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3391 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3392 (Testing ? "skipping" : "salvaging"));
3395 CopyAndSalvage(salvinfo, &dir);
3397 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3400 dirHandle = dir.dirHandle;
3403 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3404 &dirVnodeInfo->vnodes[i], path);
3407 /* If enumeration failed for random reasons, we will probably delete
3408 * too much stuff, so we guard against this instead.
3410 struct judgeEntry_params judge_params;
3411 judge_params.salvinfo = salvinfo;
3412 judge_params.dir = &dir;
3414 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3417 /* Delete the old directory if it was copied in order to salvage.
3418 * CopyOnWrite has written the new inode # to the disk, but we still
3419 * have the old one in our local structure here. Thus, we idec the
3423 if (dir.copied && !Testing) {
3424 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3425 osi_Assert(code == 0);
3426 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3429 /* Remember rootdir DirSummary _after_ it has been judged */
3430 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3431 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3439 * Get a new FID that can be used to create a new file.
3441 * @param[in] volHeader vol header for the volume
3442 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3443 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3444 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3445 * updated to the new max unique if we create a new
3449 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3450 VnodeClass class, AFSFid *afid, Unique *maxunique)
3453 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3454 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3458 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3459 /* no free vnodes; make a new one */
3460 salvinfo->vnodeInfo[class].nVnodes++;
3461 salvinfo->vnodeInfo[class].vnodes =
3462 realloc(salvinfo->vnodeInfo[class].vnodes,
3463 sizeof(struct VnodeEssence) * (i+1));
3465 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3468 afid->Vnode = bitNumberToVnodeNumber(i, class);
3470 if (volHeader->uniquifier < (*maxunique + 1)) {
3471 /* header uniq is bad; it will get bumped by 2000 later */
3472 afid->Unique = *maxunique + 1 + 2000;
3475 /* header uniq seems okay; just use that */
3476 afid->Unique = *maxunique = volHeader->uniquifier++;
3481 * Create a vnode for a README file explaining not to use a recreated-root vol.
3483 * @param[in] volHeader vol header for the volume
3484 * @param[in] alinkH ihandle for i/o for the volume
3485 * @param[in] vid volume id
3486 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3487 * updated to the new max unique if we create a new
3489 * @param[out] afid FID for the new readme vnode
3490 * @param[out] ainode the inode for the new readme file
3492 * @return operation status
3497 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3498 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3502 struct VnodeDiskObject *rvnode = NULL;
3504 IHandle_t *readmeH = NULL;
3505 struct VnodeEssence *vep;
3507 time_t now = time(NULL);
3509 /* Try to make the note brief, but informative. Only administrators should
3510 * be able to read this file at first, so we can hopefully assume they
3511 * know what AFS is, what a volume is, etc. */
3513 "This volume has been salvaged, but has lost its original root directory.\n"
3514 "The root directory that exists now has been recreated from orphan files\n"
3515 "from the rest of the volume. This recreated root directory may interfere\n"
3516 "with old cached data on clients, and there is no way the salvager can\n"
3517 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3518 "use this volume, but only copy the salvaged data to a new volume.\n"
3519 "Continuing to use this volume as it exists now may cause some clients to\n"
3520 "behave oddly when accessing this volume.\n"
3521 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3522 /* ^ the person reading this probably just lost some data, so they could
3523 * use some cheering up. */
3525 /* -1 for the trailing NUL */
3526 length = sizeof(readme) - 1;
3528 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3530 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3532 /* create the inode and write the contents */
3533 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3534 salvinfo->fileSysPath, 0, vid,
3535 afid->Vnode, afid->Unique, 1);
3536 if (!VALID_INO(readmeinode)) {
3537 Log("CreateReadme: readme IH_CREATE failed\n");
3541 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3542 bytes = IH_IWRITE(readmeH, 0, readme, length);
3543 IH_RELEASE(readmeH);
3545 if (bytes != length) {
3546 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3547 (int)sizeof(readme));
3551 /* create the vnode and write it out */
3552 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3554 Log("CreateRootDir: error alloc'ing memory\n");
3558 rvnode->type = vFile;
3560 rvnode->modeBits = 0777;
3561 rvnode->linkCount = 1;
3562 VNDISK_SET_LEN(rvnode, length);
3563 rvnode->uniquifier = afid->Unique;
3564 rvnode->dataVersion = 1;
3565 VNDISK_SET_INO(rvnode, readmeinode);
3566 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3571 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3573 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3574 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3575 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3577 if (bytes != SIZEOF_SMALLDISKVNODE) {
3578 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3579 (int)SIZEOF_SMALLDISKVNODE);
3583 /* update VnodeEssence for new readme vnode */
3584 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3586 vep->blockCount = nBlocks(length);
3587 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3588 vep->parent = rvnode->parent;
3589 vep->unique = rvnode->uniquifier;
3590 vep->modeBits = rvnode->modeBits;
3591 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3592 vep->type = rvnode->type;
3593 vep->author = rvnode->author;
3594 vep->owner = rvnode->owner;
3595 vep->group = rvnode->group;
3605 *ainode = readmeinode;
3610 if (IH_DEC(alinkH, readmeinode, vid)) {
3611 Log("CreateReadme (recovery): IH_DEC failed\n");
3623 * create a root dir for a volume that lacks one.
3625 * @param[in] volHeader vol header for the volume
3626 * @param[in] alinkH ihandle for disk access for this volume group
3627 * @param[in] vid volume id we're dealing with
3628 * @param[out] rootdir populated with info about the new root dir
3629 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3630 * updated to the new max unique if we create a new
3633 * @return operation status
3638 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3639 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3643 int decroot = 0, decreadme = 0;
3644 AFSFid did, readmeid;
3647 struct VnodeDiskObject *rootvnode = NULL;
3648 struct acl_accessList *ACL;
3651 struct VnodeEssence *vep;
3653 time_t now = time(NULL);
3655 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3656 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3660 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3661 /* We don't have any large vnodes in the volume; allocate room
3662 * for one so we can recreate the root dir */
3663 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3664 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3665 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3667 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3668 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3671 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3672 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3673 if (vep->type != vNull) {
3674 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3678 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3679 &readmeinode) != 0) {
3684 /* set the DV to a very high number, so it is unlikely that we collide
3685 * with a cached DV */
3688 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3690 if (!VALID_INO(rootinode)) {
3691 Log("CreateRootDir: IH_CREATE failed\n");
3696 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3697 rootinode, &salvinfo->VolumeChanged);
3701 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3702 Log("CreateRootDir: MakeDir failed\n");
3705 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3706 Log("CreateRootDir: Create failed\n");
3710 length = Length(&rootdir->dirHandle);
3711 DZap((void *)&rootdir->dirHandle);
3713 /* create the new root dir vnode */
3714 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3716 Log("CreateRootDir: malloc failed\n");
3720 /* only give 'rl' permissions to 'system:administrators'. We do this to
3721 * try to catch the attention of an administrator, that they should not
3722 * be writing to this directory or continue to use it. */
3723 ACL = VVnodeDiskACL(rootvnode);
3724 ACL->size = sizeof(struct acl_accessList);
3725 ACL->version = ACL_ACLVERSION;
3729 ACL->entries[0].id = -204; /* system:administrators */
3730 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3732 rootvnode->type = vDirectory;
3733 rootvnode->cloned = 0;
3734 rootvnode->modeBits = 0777;
3735 rootvnode->linkCount = 2;
3736 VNDISK_SET_LEN(rootvnode, length);
3737 rootvnode->uniquifier = 1;
3738 rootvnode->dataVersion = dv;
3739 VNDISK_SET_INO(rootvnode, rootinode);
3740 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3741 rootvnode->author = 0;
3742 rootvnode->owner = 0;
3743 rootvnode->parent = 0;
3744 rootvnode->group = 0;
3745 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3747 /* write it out to disk */
3748 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3749 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3750 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3752 if (bytes != SIZEOF_LARGEDISKVNODE) {
3753 /* just cast to int and don't worry about printing real 64-bit ints;
3754 * a large disk vnode isn't anywhere near the 32-bit limit */
3755 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3756 (int)SIZEOF_LARGEDISKVNODE);
3760 /* update VnodeEssence for the new root vnode */
3761 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3763 vep->blockCount = nBlocks(length);
3764 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3765 vep->parent = rootvnode->parent;
3766 vep->unique = rootvnode->uniquifier;
3767 vep->modeBits = rootvnode->modeBits;
3768 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3769 vep->type = rootvnode->type;
3770 vep->author = rootvnode->author;
3771 vep->owner = rootvnode->owner;
3772 vep->group = rootvnode->group;
3782 /* update DirSummary for the new root vnode */
3783 rootdir->vnodeNumber = 1;
3784 rootdir->unique = 1;
3785 rootdir->haveDot = 1;
3786 rootdir->haveDotDot = 1;
3787 rootdir->rwVid = vid;
3788 rootdir->copied = 0;
3789 rootdir->parent = 0;
3790 rootdir->name = strdup(".");
3791 rootdir->vname = volHeader->name;
3792 rootdir->ds_linkH = alinkH;
3799 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3800 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3802 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3803 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3813 * salvage a volume group.
3815 * @param[in] salvinfo information for the curent salvage job
3816 * @param[in] rwIsp inode summary for rw volume
3817 * @param[in] alinkH link table inode handle
3819 * @return operation status
3823 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3825 /* This routine, for now, will only be called for read-write volumes */
3827 int BlocksInVolume = 0, FilesInVolume = 0;
3829 struct DirSummary rootdir, oldrootdir;
3830 struct VnodeInfo *dirVnodeInfo;
3831 struct VnodeDiskObject vnode;
3832 VolumeDiskData volHeader;
3834 int orphaned, rootdirfound = 0;
3835 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3836 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3837 struct VnodeEssence *vep;
3840 afs_sfsize_t nBytes;
3842 VnodeId LFVnode, ThisVnode;
3843 Unique LFUnique, ThisUnique;
3847 vid = rwIsp->volSummary->header.id;
3848 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3849 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3850 osi_Assert(nBytes == sizeof(volHeader));
3851 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3852 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3853 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3855 DistilVnodeEssence(salvinfo, vid, vLarge,
3856 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3857 DistilVnodeEssence(salvinfo, vid, vSmall,
3858 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3860 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3861 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3862 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3863 &rootdir, &rootdirfound);
3866 nt_sync(salvinfo->fileSysDevice);
3868 sync(); /* This used to be done lower level, for every dir */
3875 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3877 Log("Cannot find root directory for volume %lu; attempting to create "
3878 "a new one\n", afs_printable_uint32_lu(vid));
3880 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3885 salvinfo->VolumeChanged = 1;
3889 /* Parse each vnode looking for orphaned vnodes and
3890 * connect them to the tree as orphaned (if requested).
3892 oldrootdir = rootdir;
3893 for (class = 0; class < nVNODECLASSES; class++) {
3894 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3895 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3896 ThisVnode = bitNumberToVnodeNumber(v, class);
3897 ThisUnique = vep->unique;
3899 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3900 continue; /* Ignore unused, claimed, and root vnodes */
3902 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3903 * entry in this vnode had incremented the parent link count (In
3904 * JudgeEntry()). We need to go to the parent and decrement that
3905 * link count. But if the parent's unique is zero, then the parent
3906 * link count was not incremented in JudgeEntry().
3908 if (class == vLarge) { /* directory vnode */
3909 pv = vnodeIdToBitNumber(vep->parent);
3910 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3911 if (vep->parent == 1 && newrootdir) {
3912 /* this vnode's parent was the volume root, and
3913 * we just created the volume root. So, the parent
3914 * dir didn't exist during JudgeEntry, so the link
3915 * count was not inc'd there, so don't dec it here.
3921 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3927 continue; /* If no rootdir, can't attach orphaned files */
3929 /* Here we attach orphaned files and directories into the
3930 * root directory, LVVnode, making sure link counts stay correct.
3932 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3933 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3934 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3936 /* Update this orphaned vnode's info. Its parent info and
3937 * link count (do for orphaned directories and files).
3939 vep->parent = LFVnode; /* Parent is the root dir */
3940 vep->unique = LFUnique;
3943 vep->count--; /* Inc link count (root dir will pt to it) */
3945 /* If this orphaned vnode is a directory, change '..'.
3946 * The name of the orphaned dir/file is unknown, so we
3947 * build a unique name. No need to CopyOnWrite the directory
3948 * since it is not connected to tree in BK or RO volume and
3949 * won't be visible there.
3951 if (class == vLarge) {
3955 /* Remove and recreate the ".." entry in this orphaned directory */
3956 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3957 salvinfo->vnodeInfo[class].inodes[v],
3958 &salvinfo->VolumeChanged);
3960 pa.Unique = LFUnique;
3961 osi_Assert(Delete(&dh, "..") == 0);
3962 osi_Assert(Create(&dh, "..", &pa) == 0);
3964 /* The original parent's link count was decremented above.
3965 * Here we increment the new parent's link count.
3967 pv = vnodeIdToBitNumber(LFVnode);
3968 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3972 /* Go to the root dir and add this entry. The link count of the
3973 * root dir was incremented when ".." was created. Try 10 times.
3975 for (j = 0; j < 10; j++) {
3976 pa.Vnode = ThisVnode;
3977 pa.Unique = ThisUnique;
3979 snprintf(npath, sizeof npath, "%s.%u.%u",
3980 ((class == vLarge) ? "__ORPHANDIR__"
3981 : "__ORPHANFILE__"),
3982 ThisVnode, ThisUnique);
3984 CopyOnWrite(salvinfo, &rootdir);
3985 code = Create(&rootdir.dirHandle, npath, &pa);
3989 ThisUnique += 50; /* Try creating a different file */
3991 osi_Assert(code == 0);
3992 Log("Attaching orphaned %s to volume's root dir as %s\n",
3993 ((class == vLarge) ? "directory" : "file"), npath);
3995 } /* for each vnode in the class */
3996 } /* for each class of vnode */
3998 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4000 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4002 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4004 osi_Assert(code == 0);
4005 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4008 DFlush(); /* Flush the changes */
4009 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4010 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4011 orphans = ORPH_IGNORE;
4014 /* Write out all changed vnodes. Orphaned files and directories
4015 * will get removed here also (if requested).
4017 for (class = 0; class < nVNODECLASSES; class++) {
4018 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4019 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4020 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4021 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4022 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4023 for (i = 0; i < nVnodes; i++) {
4024 struct VnodeEssence *vnp = &vnodes[i];
4025 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4027 /* If the vnode is good but is unclaimed (not listed in
4028 * any directory entries), then it is orphaned.
4031 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4032 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4036 if (vnp->changed || vnp->count) {
4039 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4040 vnodeIndexOffset(vcp, vnodeNumber),
4041 (char *)&vnode, sizeof(vnode));
4042 osi_Assert(nBytes == sizeof(vnode));
4044 vnode.parent = vnp->parent;
4045 oldCount = vnode.linkCount;
4046 vnode.linkCount = vnode.linkCount - vnp->count;
4049 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4051 if (!vnp->todelete) {
4052 /* Orphans should have already been attached (if requested) */
4053 osi_Assert(orphans != ORPH_ATTACH);
4054 oblocks += vnp->blockCount;
4057 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4059 BlocksInVolume -= vnp->blockCount;
4061 if (VNDISK_GET_INO(&vnode)) {
4063 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4064 osi_Assert(code == 0);
4066 memset(&vnode, 0, sizeof(vnode));
4068 } else if (vnp->count) {
4070 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4073 vnode.modeBits = vnp->modeBits;
4076 vnode.dataVersion++;
4079 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4080 vnodeIndexOffset(vcp, vnodeNumber),
4081 (char *)&vnode, sizeof(vnode));
4082 osi_Assert(nBytes == sizeof(vnode));
4084 salvinfo->VolumeChanged = 1;
4088 if (!Showmode && ofiles) {
4089 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4091 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4095 for (class = 0; class < nVNODECLASSES; class++) {
4096 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4097 for (i = 0; i < vip->nVnodes; i++)
4098 if (vip->vnodes[i].name)
4099 free(vip->vnodes[i].name);
4106 /* Set correct resource utilization statistics */
4107 volHeader.filecount = FilesInVolume;
4108 volHeader.diskused = BlocksInVolume;
4110 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4111 if (volHeader.uniquifier < (maxunique + 1)) {
4113 Log("Volume uniquifier is too low; fixed\n");
4114 /* Plus 2,000 in case there are workstations out there with
4115 * cached vnodes that have since been deleted
4117 volHeader.uniquifier = (maxunique + 1 + 2000);
4121 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4122 "Only use this salvaged volume to copy data to another volume; "
4123 "do not continue to use this volume (%lu) as-is.\n",
4124 afs_printable_uint32_lu(vid));
4127 #ifdef FSSYNC_BUILD_CLIENT
4128 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4129 afs_int32 fsync_code;
4131 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4133 Log("Error trying to tell the fileserver to break callbacks for "
4134 "changed volume %lu; error code %ld\n",
4135 afs_printable_uint32_lu(vid),
4136 afs_printable_int32_ld(fsync_code));
4138 salvinfo->VolumeChanged = 0;
4141 #endif /* FSSYNC_BUILD_CLIENT */
4143 /* Turn off the inUse bit; the volume's been salvaged! */
4144 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4145 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4146 volHeader.inService = 1; /* allow service again */
4147 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4148 volHeader.dontSalvage = DONT_SALVAGE;
4149 salvinfo->VolumeChanged = 0;
4151 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4152 osi_Assert(nBytes == sizeof(volHeader));
4155 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4156 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4157 FilesInVolume, BlocksInVolume);
4160 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4161 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4167 ClearROInUseBit(struct VolumeSummary *summary)
4169 IHandle_t *h = summary->volumeInfoHandle;
4170 afs_sfsize_t nBytes;
4172 VolumeDiskData volHeader;
4174 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4175 osi_Assert(nBytes == sizeof(volHeader));
4176 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4177 volHeader.inUse = 0;
4178 volHeader.needsSalvaged = 0;
4179 volHeader.inService = 1;
4180 volHeader.dontSalvage = DONT_SALVAGE;
4182 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4183 osi_Assert(nBytes == sizeof(volHeader));
4188 * Possible delete the volume.
4190 * deleteMe - Always do so, only a partial volume.
4193 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4194 char *message, int deleteMe, int check)
4196 if (readOnly(isp) || deleteMe) {
4197 if (isp->volSummary && isp->volSummary->fileName) {
4200 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4202 Log("It will be deleted on this server (you may find it elsewhere)\n");
4205 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4207 Log("it will be deleted instead. It should be recloned.\n");
4212 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4214 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4216 Log("Error %ld destroying volume disk header for volume %lu\n",
4217 afs_printable_int32_ld(code),
4218 afs_printable_uint32_lu(isp->volumeId));
4221 /* make sure we actually delete the fileName file; ENOENT
4222 * is fine, since VDestroyVolumeDiskHeader probably already
4224 if (unlink(path) && errno != ENOENT) {
4225 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4227 if (salvinfo->useFSYNC) {
4228 AskDelete(salvinfo, isp->volumeId);
4230 isp->volSummary->deleted = 1;
4233 } else if (!check) {
4234 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4236 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4240 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4242 * Locks a volume on disk for salvaging.
4244 * @param[in] volumeId volume ID to lock
4246 * @return operation status
4248 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4249 * checked out and locked again
4254 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4259 /* should always be WRITE_LOCK, but keep the lock-type logic all
4260 * in one place, in VVolLockType. Params will be ignored, but
4261 * try to provide what we're logically doing. */
4262 locktype = VVolLockType(V_VOLUPD, 1);
4264 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4266 if (code == EBUSY) {
4267 Abort("Someone else appears to be using volume %lu; Aborted\n",
4268 afs_printable_uint32_lu(volumeId));
4270 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4271 afs_printable_int32_ld(code),
4272 afs_printable_uint32_lu(volumeId));
4275 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4276 if (code == SYNC_DENIED) {
4277 /* need to retry checking out volumes */
4280 if (code != SYNC_OK) {
4281 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4282 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4285 /* set inUse = programType in the volume header to ensure that nobody
4286 * tries to use this volume again without salvaging, if we somehow crash
4287 * or otherwise exit before finishing the salvage.
4291 struct VolumeHeader header;
4292 struct VolumeDiskHeader diskHeader;
4293 struct VolumeDiskData volHeader;
4295 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4300 DiskToVolumeHeader(&header, &diskHeader);
4302 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4303 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4304 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4310 volHeader.inUse = programType;
4312 /* If we can't re-write the header, bail out and error. We don't
4313 * assert when reading the header, since it's possible the
4314 * header isn't really there (when there's no data associated
4315 * with the volume; we just delete the vol header file in that
4316 * case). But if it's there enough that we can read it, but
4317 * somehow we cannot write to it to signify we're salvaging it,
4318 * we've got a big problem and we cannot continue. */
4319 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4326 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4329 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4334 memset(&res, 0, sizeof(res));
4336 for (i = 0; i < 3; i++) {
4337 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4338 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4340 if (code == SYNC_OK) {
4342 } else if (code == SYNC_DENIED) {
4344 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4346 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4347 Abort("Salvage aborted\n");
4348 } else if (code == SYNC_BAD_COMMAND) {
4349 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4352 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4353 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4355 Log("AskOffline: fileserver is DAFS but we are not.\n");
4358 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4359 Log("AskOffline: fileserver is not DAFS but we are.\n");
4361 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4364 Abort("Salvage aborted\n");
4367 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4368 FSYNC_clientFinis();
4372 if (code != SYNC_OK) {
4373 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4374 Abort("Salvage aborted\n");
4378 /* don't want to pass around state; remember it here */
4379 static int isDAFS = -1;
4383 afs_int32 code, i, ret = 0;
4386 /* we don't care if we race. the answer shouldn't change */
4390 memset(&res, 0, sizeof(res));
4392 for (i = 0; i < 3; i++) {
4393 code = FSYNC_VolOp(1, NULL,
4394 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4396 if (code == SYNC_OK) {
4399 } else if (code == SYNC_DENIED) {
4402 } else if (code == SYNC_BAD_COMMAND) {
4405 } else if (code == SYNC_FAILED) {
4406 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4413 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4414 FSYNC_clientFinis();
4424 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4426 struct VolumeDiskHeader diskHdr;
4428 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4430 /* volume probably does not exist; no need to bring back online */
4433 AskOnline(salvinfo, volumeId);
4437 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4441 for (i = 0; i < 3; i++) {
4442 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4443 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4445 if (code == SYNC_OK) {
4447 } else if (code == SYNC_DENIED) {
4448 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4449 } else if (code == SYNC_BAD_COMMAND) {
4450 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4452 Log("AskOnline: please make sure file server binaries are same version.\n");
4456 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4457 FSYNC_clientFinis();
4464 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4469 for (i = 0; i < 3; i++) {
4470 memset(&res, 0, sizeof(res));
4471 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4472 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4474 if (code == SYNC_OK) {
4476 } else if (code == SYNC_DENIED) {
4477 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4478 } else if (code == SYNC_BAD_COMMAND) {
4479 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4482 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4483 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4485 Log("AskOnline: fileserver is DAFS but we are not.\n");
4488 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4489 Log("AskOnline: fileserver is not DAFS but we are.\n");
4491 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4495 } else if (code == SYNC_FAILED &&
4496 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4497 res.hdr.reason == FSYNC_WRONG_PART)) {
4498 /* volume is already effectively 'deleted' */
4502 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4503 FSYNC_clientFinis();
4510 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4512 /* Volume parameter is passed in case iopen is upgraded in future to
4513 * require a volume Id to be passed
4516 IHandle_t *srcH, *destH;
4517 FdHandle_t *srcFdP, *destFdP;
4519 afs_foff_t size = 0;
4521 IH_INIT(srcH, device, rwvolume, inode1);
4522 srcFdP = IH_OPEN(srcH);
4523 osi_Assert(srcFdP != NULL);
4524 IH_INIT(destH, device, rwvolume, inode2);
4525 destFdP = IH_OPEN(destH);
4526 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4527 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4530 osi_Assert(nBytes == 0);
4531 FDH_REALLYCLOSE(srcFdP);
4532 FDH_REALLYCLOSE(destFdP);
4539 PrintInodeList(struct SalvInfo *salvinfo)
4541 struct ViceInodeInfo *ip;
4542 struct ViceInodeInfo *buf;
4545 afs_sfsize_t st_size;
4547 st_size = OS_SIZE(salvinfo->inodeFd);
4548 osi_Assert(st_size >= 0);
4549 buf = (struct ViceInodeInfo *)malloc(st_size);
4550 osi_Assert(buf != NULL);
4551 nInodes = st_size / sizeof(struct ViceInodeInfo);
4552 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4553 for (ip = buf; nInodes--; ip++) {
4554 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4555 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4556 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4557 ip->u.param[2], ip->u.param[3]);
4563 PrintInodeSummary(struct SalvInfo *salvinfo)
4566 struct InodeSummary *isp;
4568 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4569 isp = &salvinfo->inodeSummary[i];
4570 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4575 PrintVolumeSummary(struct SalvInfo *salvinfo)
4578 struct VolumeSummary *vsp;
4580 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4581 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4591 osi_Assert(0); /* Fork is never executed in the NT code path */
4595 #ifdef AFS_DEMAND_ATTACH_FS
4596 if ((f == 0) && (programType == salvageServer)) {
4597 /* we are a salvageserver child */
4598 #ifdef FSSYNC_BUILD_CLIENT
4599 VChildProcReconnectFS_r();
4601 #ifdef SALVSYNC_BUILD_CLIENT
4605 #endif /* AFS_DEMAND_ATTACH_FS */
4606 #endif /* !AFS_NT40_ENV */
4616 #ifdef AFS_DEMAND_ATTACH_FS
4617 if (programType == salvageServer) {
4618 #ifdef SALVSYNC_BUILD_CLIENT
4621 #ifdef FSSYNC_BUILD_CLIENT
4625 #endif /* AFS_DEMAND_ATTACH_FS */
4628 if (main_thread != pthread_self())
4629 pthread_exit((void *)code);
4642 pid = wait(&status);
4643 osi_Assert(pid != -1);
4644 if (WCOREDUMP(status))
4645 Log("\"%s\" core dumped!\n", prog);
4646 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4652 TimeStamp(time_t clock, int precision)
4655 static char timestamp[20];
4656 lt = localtime(&clock);
4658 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4660 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4665 CheckLogFile(char * log_path)
4667 char oldSlvgLog[AFSDIR_PATH_MAX];
4669 #ifndef AFS_NT40_ENV
4676 strcpy(oldSlvgLog, log_path);
4677 strcat(oldSlvgLog, ".old");
4679 renamefile(log_path, oldSlvgLog);
4680 logFile = afs_fopen(log_path, "a");
4682 if (!logFile) { /* still nothing, use stdout */
4686 #ifndef AFS_NAMEI_ENV
4687 AFS_DEBUG_IOPS_LOG(logFile);
4692 #ifndef AFS_NT40_ENV
4694 TimeStampLogFile(char * log_path)
4696 char stampSlvgLog[AFSDIR_PATH_MAX];
4701 lt = localtime(&now);
4702 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4703 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4704 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4705 lt->tm_min, lt->tm_sec);
4707 /* try to link the logfile to a timestamped filename */
4708 /* if it fails, oh well, nothing we can do */
4709 link(log_path, stampSlvgLog);
4718 #ifndef AFS_NT40_ENV
4720 printf("Can't show log since using syslog.\n");
4731 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4734 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4737 while (fgets(line, sizeof(line), logFile))
4744 Log(const char *format, ...)
4750 va_start(args, format);
4751 vsnprintf(tmp, sizeof tmp, format, args);
4753 #ifndef AFS_NT40_ENV
4755 syslog(LOG_INFO, "%s", tmp);
4759 gettimeofday(&now, 0);
4760 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4766 Abort(const char *format, ...)
4771 va_start(args, format);
4772 vsnprintf(tmp, sizeof tmp, format, args);
4774 #ifndef AFS_NT40_ENV
4776 syslog(LOG_INFO, "%s", tmp);
4780 fprintf(logFile, "%s", tmp);
4792 ToString(const char *s)
4795 p = (char *)malloc(strlen(s) + 1);
4796 osi_Assert(p != NULL);
4801 /* Remove the FORCESALVAGE file */
4803 RemoveTheForce(char *path)
4806 struct afs_stat_st force; /* so we can use afs_stat to find it */
4807 strcpy(target,path);
4808 strcat(target,"/FORCESALVAGE");
4809 if (!Testing && ForceSalvage) {
4810 if (afs_stat(target,&force) == 0) unlink(target);
4814 #ifndef AFS_AIX32_ENV
4816 * UseTheForceLuke - see if we can use the force
4819 UseTheForceLuke(char *path)
4821 struct afs_stat_st force;
4823 strcpy(target,path);
4824 strcat(target,"/FORCESALVAGE");
4826 return (afs_stat(target, &force) == 0);
4830 * UseTheForceLuke - see if we can use the force
4833 * The VRMIX fsck will not muck with the filesystem it is supposedly
4834 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4835 * muck directly with the root inode, which is within the normal
4837 * ListViceInodes() has a side effect of setting ForceSalvage if
4838 * it detects a need, based on root inode examination.
4841 UseTheForceLuke(char *path)
4844 return 0; /* sorry OB1 */
4849 /* NT support routines */
4851 static char execpathname[MAX_PATH];
4853 nt_SalvagePartition(char *partName, int jobn)
4858 if (!*execpathname) {
4859 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4860 if (!n || n == 1023)
4863 job.cj_magic = SALVAGER_MAGIC;
4864 job.cj_number = jobn;
4865 (void)strcpy(job.cj_part, partName);
4866 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4871 nt_SetupPartitionSalvage(void *datap, int len)
4873 childJob_t *jobp = (childJob_t *) datap;
4874 char logname[AFSDIR_PATH_MAX];
4876 if (len != sizeof(childJob_t))
4878 if (jobp->cj_magic != SALVAGER_MAGIC)
4883 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4885 logFile = afs_fopen(logname, "w");
4893 #endif /* AFS_NT40_ENV */