2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
208 extern void *calloc();
210 static char *TimeStamp(time_t clock, int precision);
213 int debug; /* -d flag */
214 extern int Testing; /* -n flag */
215 int ListInodeOption; /* -i flag */
216 int ShowRootFiles; /* -r flag */
217 int RebuildDirs; /* -sal flag */
218 int Parallel = 4; /* -para X flag */
219 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
220 int forceR = 0; /* -b flag */
221 int ShowLog = 0; /* -showlog flag */
222 int ShowSuid = 0; /* -showsuid flag */
223 int ShowMounts = 0; /* -showmounts flag */
224 int orphans = ORPH_IGNORE; /* -orphans option */
229 int useSyslog = 0; /* -syslog flag */
230 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
239 #define MAXPARALLEL 32
241 int OKToZap; /* -o flag */
242 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
243 * in the volume header */
245 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
247 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
250 * information that is 'global' to a particular salvage job.
253 Device fileSysDevice; /**< The device number of the current partition
255 char fileSysPath[8]; /**< The path of the mounted partition currently
256 * being salvaged, i.e. the directory containing
257 * the volume headers */
258 char *fileSysPathName; /**< NT needs this to make name pretty log. */
259 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
260 int VGLinkH_cnt; /**< # of references to lnk handle. */
261 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
264 char *fileSysDeviceName; /**< The block device where the file system being
265 * salvaged was mounted */
266 char *filesysfulldev;
268 int VolumeChanged; /**< Set by any routine which would change the
269 * volume in a way which would require callbacks
270 * to be broken if the volume was put back on
271 * on line by an active file server */
273 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
274 * header dealt with */
276 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
277 FD_t inodeFd; /**< File descriptor for inode file */
279 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
280 int nVolumes; /**< Number of volumes (read-write and read-only)
281 * in volume summary */
282 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
285 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
286 * vnodes in the volume that
287 * we are currently looking
289 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
290 * to contact the fileserver over FSYNC */
297 /* Forward declarations */
298 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
299 static int AskVolumeSummary(struct SalvInfo *salvinfo,
300 VolumeId singleVolumeNumber);
301 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
303 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
304 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
305 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
307 /* Uniquifier stored in the Inode */
312 return (u & 0x3fffff);
314 #if defined(AFS_SGI_EXMAG)
315 return (u & SGI_UNIQMASK);
318 #endif /* AFS_SGI_EXMAG */
325 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
327 return 0; /* otherwise may be transient, e.g. EMFILE */
332 char *save_args[MAX_ARGS];
334 extern pthread_t main_thread;
335 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
339 * Get the salvage lock if not already held. Hold until process exits.
341 * @param[in] locktype READ_LOCK or WRITE_LOCK
344 _ObtainSalvageLock(int locktype)
346 struct VLockFile salvageLock;
351 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
353 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
356 "salvager: There appears to be another salvager running! "
361 "salvager: Error %d trying to acquire salvage lock! "
367 ObtainSalvageLock(void)
369 _ObtainSalvageLock(WRITE_LOCK);
372 ObtainSharedSalvageLock(void)
374 _ObtainSalvageLock(READ_LOCK);
378 #ifdef AFS_SGI_XFS_IOPS_ENV
379 /* Check if the given partition is mounted. For XFS, the root inode is not a
380 * constant. So we check the hard way.
383 IsPartitionMounted(char *part)
386 struct mntent *mntent;
388 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
389 while (mntent = getmntent(mntfp)) {
390 if (!strcmp(part, mntent->mnt_dir))
395 return mntent ? 1 : 1;
398 /* Check if the given inode is the root of the filesystem. */
399 #ifndef AFS_SGI_XFS_IOPS_ENV
401 IsRootInode(struct afs_stat_st *status)
404 * The root inode is not a fixed value in XFS partitions. So we need to
405 * see if the partition is in the list of mounted partitions. This only
406 * affects the SalvageFileSys path, so we check there.
408 return (status->st_ino == ROOTINODE);
413 #ifndef AFS_NAMEI_ENV
414 /* We don't want to salvage big files filesystems, since we can't put volumes on
418 CheckIfBigFilesFS(char *mountPoint, char *devName)
420 struct superblock fs;
423 if (strncmp(devName, "/dev/", 5)) {
424 (void)sprintf(name, "/dev/%s", devName);
426 (void)strcpy(name, devName);
429 if (ReadSuper(&fs, name) < 0) {
430 Log("Unable to read superblock. Not salvaging partition %s.\n",
434 if (IsBigFilesFileSystem(&fs)) {
435 Log("Partition %s is a big files filesystem, not salvaging.\n",
445 #define HDSTR "\\Device\\Harddisk"
446 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
448 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
454 static int dowarn = 1;
456 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
458 if (strncmp(res1, HDSTR, HDLEN)) {
461 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
462 res1, HDSTR, p1->devName);
465 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
467 if (strncmp(res2, HDSTR, HDLEN)) {
470 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
471 res2, HDSTR, p2->devName);
475 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
478 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
481 /* This assumes that two partitions with the same device number divided by
482 * PartsPerDisk are on the same disk.
485 SalvageFileSysParallel(struct DiskPartition64 *partP)
488 struct DiskPartition64 *partP;
489 int pid; /* Pid for this job */
490 int jobnumb; /* Log file job number */
491 struct job *nextjob; /* Next partition on disk to salvage */
493 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
494 struct job *thisjob = 0;
495 static int numjobs = 0;
496 static int jobcount = 0;
502 char logFileName[256];
506 /* We have a partition to salvage. Copy it into thisjob */
507 thisjob = (struct job *)malloc(sizeof(struct job));
509 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
512 memset(thisjob, 0, sizeof(struct job));
513 thisjob->partP = partP;
514 thisjob->jobnumb = jobcount;
516 } else if (jobcount == 0) {
517 /* We are asking to wait for all jobs (partp == 0), yet we never
520 Log("No file system partitions named %s* found; not salvaged\n",
521 VICE_PARTITION_PREFIX);
525 if (debug || Parallel == 1) {
527 SalvageFileSys(thisjob->partP, 0);
534 /* Check to see if thisjob is for a disk that we are already
535 * salvaging. If it is, link it in as the next job to do. The
536 * jobs array has 1 entry per disk being salvages. numjobs is
537 * the total number of disks currently being salvaged. In
538 * order to keep thejobs array compact, when a disk is
539 * completed, the hightest element in the jobs array is moved
540 * down to now open slot.
542 for (j = 0; j < numjobs; j++) {
543 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
544 /* On same disk, add it to this list and return */
545 thisjob->nextjob = jobs[j]->nextjob;
546 jobs[j]->nextjob = thisjob;
553 /* Loop until we start thisjob or until all existing jobs are finished */
554 while (thisjob || (!partP && (numjobs > 0))) {
555 startjob = -1; /* No new job to start */
557 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
558 /* Either the max jobs are running or we have to wait for all
559 * the jobs to finish. In either case, we wait for at least one
560 * job to finish. When it's done, clean up after it.
562 pid = wait(&wstatus);
563 osi_Assert(pid != -1);
564 for (j = 0; j < numjobs; j++) { /* Find which job it is */
565 if (pid == jobs[j]->pid)
568 osi_Assert(j < numjobs);
569 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
570 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
573 numjobs--; /* job no longer running */
574 oldjob = jobs[j]; /* remember */
575 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
576 free(oldjob); /* free the old job */
578 /* If there is another partition on the disk to salvage, then
579 * say we will start it (startjob). If not, then put thisjob there
580 * and say we will start it.
582 if (jobs[j]) { /* Another partitions to salvage */
583 startjob = j; /* Will start it */
584 } else { /* There is not another partition to salvage */
586 jobs[j] = thisjob; /* Add thisjob */
588 startjob = j; /* Will start it */
590 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
591 startjob = -1; /* Don't start it - already running */
595 /* We don't have to wait for a job to complete */
597 jobs[numjobs] = thisjob; /* Add this job */
599 startjob = numjobs; /* Will start it */
603 /* Start up a new salvage job on a partition in job slot "startjob" */
604 if (startjob != -1) {
606 Log("Starting salvage of file system partition %s\n",
607 jobs[startjob]->partP->name);
609 /* For NT, we not only fork, but re-exec the salvager. Pass in the
610 * commands and pass the child job number via the data path.
613 nt_SalvagePartition(jobs[startjob]->partP->name,
614 jobs[startjob]->jobnumb);
615 jobs[startjob]->pid = pid;
620 jobs[startjob]->pid = pid;
626 for (fd = 0; fd < 16; fd++)
633 openlog("salvager", LOG_PID, useSyslogFacility);
637 (void)afs_snprintf(logFileName, sizeof logFileName,
639 AFSDIR_SERVER_SLVGLOG_FILEPATH,
640 jobs[startjob]->jobnumb);
641 logFile = afs_fopen(logFileName, "w");
646 SalvageFileSys1(jobs[startjob]->partP, 0);
651 } /* while ( thisjob || (!partP && numjobs > 0) ) */
653 /* If waited for all jobs to complete, now collect log files and return */
655 if (!useSyslog) /* if syslogging - no need to collect */
658 for (i = 0; i < jobcount; i++) {
659 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
660 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
661 if ((passLog = afs_fopen(logFileName, "r"))) {
662 while (fgets(buf, sizeof(buf), passLog)) {
667 (void)unlink(logFileName);
676 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
678 if (!canfork || debug || Fork() == 0) {
679 SalvageFileSys1(partP, singleVolumeNumber);
680 if (canfork && !debug) {
685 Wait("SalvageFileSys");
689 get_DevName(char *pbuffer, char *wpath)
691 char pbuf[128], *ptr;
692 strcpy(pbuf, pbuffer);
693 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
699 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
701 strcpy(pbuffer, ptr + 1);
708 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
711 char inodeListPath[256];
712 FD_t inodeFile = INVALID_FD;
713 static char tmpDevName[100];
714 static char wpath[100];
715 struct VolumeSummary *vsp, *esp;
719 struct SalvInfo l_salvinfo;
720 struct SalvInfo *salvinfo = &l_salvinfo;
723 memset(salvinfo, 0, sizeof(*salvinfo));
726 if (inodeFile != INVALID_FD) {
728 inodeFile = INVALID_FD;
730 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
731 Abort("Raced too many times with fileserver restarts while trying to "
732 "checkout/lock volumes; Aborted\n");
734 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
736 /* unlock all previous volume locks, since we're about to lock them
738 VLockFileReinit(&partP->volLockFile);
740 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
742 salvinfo->fileSysPartition = partP;
743 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
744 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
747 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
748 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
749 name = partP->devName;
751 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
752 strcpy(tmpDevName, partP->devName);
753 name = get_DevName(tmpDevName, wpath);
754 salvinfo->fileSysDeviceName = name;
755 salvinfo->filesysfulldev = wpath;
758 if (singleVolumeNumber) {
759 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
760 /* only non-DAFS locks the partition when salvaging a single volume;
761 * DAFS will lock the individual volumes in the VG */
762 VLockPartition(partP->name);
763 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
767 /* salvageserver already setup fssync conn for us */
768 if ((programType != salvageServer) && !VConnectFS()) {
769 Abort("Couldn't connect to file server\n");
772 salvinfo->useFSYNC = 1;
773 AskOffline(salvinfo, singleVolumeNumber);
774 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
775 if (LockVolume(salvinfo, singleVolumeNumber)) {
778 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
781 salvinfo->useFSYNC = 0;
782 VLockPartition(partP->name);
786 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
789 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
790 partP->name, name, (Testing ? "(READONLY mode)" : ""));
792 Log("***Forced salvage of all volumes on this partition***\n");
797 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
804 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
805 while ((dp = readdir(dirp))) {
806 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
807 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
809 Log("Removing old salvager temp files %s\n", dp->d_name);
810 strcpy(npath, salvinfo->fileSysPath);
811 strcat(npath, OS_DIRSEP);
812 strcat(npath, dp->d_name);
818 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
820 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
821 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
823 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
827 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
828 if (inodeFile == INVALID_FD) {
829 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
832 /* Using nt_unlink here since we're really using the delete on close
833 * semantics of unlink. In most places in the salvager, we really do
834 * mean to unlink the file at that point. Those places have been
835 * modified to actually do that so that the NT crt can be used there.
837 * jaltman - On NT delete on close cannot be applied to a file while the
838 * process has an open file handle that does not have DELETE file
839 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
840 * delete privileges. As a result the nt_unlink() call will always
843 code = nt_unlink(inodeListPath);
845 code = unlink(inodeListPath);
848 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
851 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
853 if (singleVolumeNumber) {
854 /* the volume group -- let alone the volume -- does not exist,
855 * but we checked it out, so give it back to the fileserver */
856 AskDelete(salvinfo, singleVolumeNumber);
860 salvinfo->inodeFd = inodeFile;
861 if (salvinfo->inodeFd == INVALID_FD)
862 Abort("Temporary file %s is missing...\n", inodeListPath);
863 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
864 if (ListInodeOption) {
865 PrintInodeList(salvinfo);
866 if (singleVolumeNumber) {
867 /* We've checked out the volume from the fileserver, and we need
868 * to give it back. We don't know if the volume exists or not,
869 * so we don't know whether to AskOnline or not. Try to determine
870 * if the volume exists by trying to read the volume header, and
871 * AskOnline if it is readable. */
872 MaybeAskOnline(salvinfo, singleVolumeNumber);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
930 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
931 /* unlock vol headers so the fs can attach them when we AskOnline */
932 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
933 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
935 /* Step through the volumeSummary list and set all volumes on-line.
936 * Most volumes were taken off-line in GetVolumeSummary.
937 * If a volume was deleted, don't tell the fileserver anything, since
938 * we already told the fileserver the volume was deleted back when we
939 * we destroyed the volume header.
940 * Also, make sure we bring the singleVolumeNumber back online first.
943 for (j = 0; j < salvinfo->nVolumes; j++) {
944 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
946 if (!salvinfo->volumeSummaryp[j].deleted) {
947 AskOnline(salvinfo, singleVolumeNumber);
953 /* If singleVolumeNumber is not in our volumeSummary, it means that
954 * at least one other volume in the VG is on the partition, but the
955 * RW volume is not. We've already AskOffline'd it by now, though,
956 * so make sure we don't still have the volume checked out. */
957 AskDelete(salvinfo, singleVolumeNumber);
960 for (j = 0; j < salvinfo->nVolumes; j++) {
961 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
962 if (!salvinfo->volumeSummaryp[j].deleted) {
963 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
969 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
970 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
973 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
977 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
980 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
983 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
986 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
988 Log("Error %ld destroying volume disk header for volume %lu\n",
989 afs_printable_int32_ld(code),
990 afs_printable_uint32_lu(vsp->header.id));
993 /* make sure we actually delete the fileName file; ENOENT
994 * is fine, since VDestroyVolumeDiskHeader probably already
996 if (unlink(path) && errno != ENOENT) {
997 Log("Unable to unlink %s (errno = %d)\n", path, errno);
999 if (salvinfo->useFSYNC) {
1000 AskDelete(salvinfo, vsp->header.id);
1008 CompareInodes(const void *_p1, const void *_p2)
1010 const struct ViceInodeInfo *p1 = _p1;
1011 const struct ViceInodeInfo *p2 = _p2;
1012 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1013 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1014 VolumeId p1rwid, p2rwid;
1016 (p1->u.vnode.vnodeNumber ==
1017 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1019 (p2->u.vnode.vnodeNumber ==
1020 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1021 if (p1rwid < p2rwid)
1023 if (p1rwid > p2rwid)
1025 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1026 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1027 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1028 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1029 if (p1->u.vnode.volumeId == p1rwid)
1031 if (p2->u.vnode.volumeId == p2rwid)
1033 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1035 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1036 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1037 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1039 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1041 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1043 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1045 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1047 /* The following tests are reversed, so that the most desirable
1048 * of several similar inodes comes first */
1049 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1050 #ifdef AFS_3DISPARES
1051 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1052 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1055 #ifdef AFS_SGI_EXMAG
1056 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1057 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1062 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1063 #ifdef AFS_3DISPARES
1064 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1065 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1068 #ifdef AFS_SGI_EXMAG
1069 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1070 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1075 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1076 #ifdef AFS_3DISPARES
1077 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1078 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1081 #ifdef AFS_SGI_EXMAG
1082 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1083 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1088 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1089 #ifdef AFS_3DISPARES
1090 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1091 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1094 #ifdef AFS_SGI_EXMAG
1095 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1096 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1105 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1106 struct InodeSummary *summary)
1108 VolumeId volume = ip->u.vnode.volumeId;
1109 VolumeId rwvolume = volume;
1114 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1116 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1118 rwvolume = ip->u.special.parentId;
1119 /* This isn't quite right, as there could (in error) be different
1120 * parent inodes in different special vnodes */
1122 if (maxunique < ip->u.vnode.vnodeUniquifier)
1123 maxunique = ip->u.vnode.vnodeUniquifier;
1127 summary->volumeId = volume;
1128 summary->RWvolumeId = rwvolume;
1129 summary->nInodes = n;
1130 summary->nSpecialInodes = nSpecial;
1131 summary->maxUniquifier = maxunique;
1135 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1137 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1138 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1139 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1144 * Collect list of inodes in file named by path. If a truly fatal error,
1145 * unlink the file and abort. For lessor errors, return -1. The file will
1146 * be unlinked by the caller.
1149 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1153 struct ViceInodeInfo *ip, *ip_save;
1154 struct InodeSummary summary;
1155 char summaryFileName[50];
1156 FD_t summaryFile = INVALID_FD;
1158 char *dev = salvinfo->fileSysPath;
1159 char *wpath = salvinfo->fileSysPath;
1161 char *dev = salvinfo->fileSysDeviceName;
1162 char *wpath = salvinfo->filesysfulldev;
1164 char *part = salvinfo->fileSysPath;
1167 afs_sfsize_t st_size;
1169 /* This file used to come from vfsck; cobble it up ourselves now... */
1171 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1172 singleVolumeNumber ? OnlyOneVolume : 0,
1173 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1175 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1178 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1180 if (forceSal && !ForceSalvage) {
1181 Log("***Forced salvage of all volumes on this partition***\n");
1184 OS_SEEK(inodeFile, 0L, SEEK_SET);
1185 salvinfo->inodeFd = inodeFile;
1186 if (salvinfo->inodeFd == INVALID_FD ||
1187 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1188 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1190 tdir = (tmpdir ? tmpdir : part);
1192 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1193 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1195 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1196 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1198 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1199 if (summaryFile == INVALID_FD) {
1200 Abort("Unable to create inode summary file\n");
1204 /* Using nt_unlink here since we're really using the delete on close
1205 * semantics of unlink. In most places in the salvager, we really do
1206 * mean to unlink the file at that point. Those places have been
1207 * modified to actually do that so that the NT crt can be used there.
1209 * jaltman - As commented elsewhere, this cannot work because fopen()
1210 * does not open files with DELETE and FILE_SHARE_DELETE.
1212 code = nt_unlink(summaryFileName);
1214 code = unlink(summaryFileName);
1217 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1220 if (!canfork || debug || Fork() == 0) {
1221 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1223 OS_CLOSE(summaryFile);
1224 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1225 RemoveTheForce(salvinfo->fileSysPath);
1227 struct VolumeSummary *vsp;
1230 GetVolumeSummary(salvinfo, singleVolumeNumber);
1232 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1234 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1237 Log("%s vice inodes on %s; not salvaged\n",
1238 singleVolumeNumber ? "No applicable" : "No", dev);
1241 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1243 OS_CLOSE(summaryFile);
1245 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1248 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1249 OS_CLOSE(summaryFile);
1250 Abort("Unable to read inode table; %s not salvaged\n", dev);
1252 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1253 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1254 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1255 OS_CLOSE(summaryFile);
1256 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1261 CountVolumeInodes(ip, nInodes, &summary);
1262 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1263 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1264 OS_CLOSE(summaryFile);
1267 summary.index += (summary.nInodes);
1268 nInodes -= summary.nInodes;
1269 ip += summary.nInodes;
1272 ip = ip_save = NULL;
1273 /* Following fflush is not fclose, because if it was debug mode would not work */
1274 if (OS_SYNC(summaryFile) == -1) {
1275 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1276 OS_CLOSE(summaryFile);
1279 if (canfork && !debug) {
1284 if (Wait("Inode summary") == -1) {
1285 OS_CLOSE(summaryFile);
1286 Exit(1); /* salvage of this partition aborted */
1290 st_size = OS_SIZE(summaryFile);
1291 osi_Assert(st_size >= 0);
1294 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1295 osi_Assert(salvinfo->inodeSummary != NULL);
1296 /* For GNU we need to do lseek to get the file pointer moved. */
1297 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1298 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1299 osi_Assert(ret == st_size);
1301 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1302 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1303 salvinfo->inodeSummary[i].volSummary = NULL;
1305 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1306 OS_CLOSE(summaryFile);
1310 /* Comparison routine for volume sort.
1311 This is setup so that a read-write volume comes immediately before
1312 any read-only clones of that volume */
1314 CompareVolumes(const void *_p1, const void *_p2)
1316 const struct VolumeSummary *p1 = _p1;
1317 const struct VolumeSummary *p2 = _p2;
1318 if (p1->header.parent != p2->header.parent)
1319 return p1->header.parent < p2->header.parent ? -1 : 1;
1320 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1322 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1324 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1328 * Gleans volumeSummary information by asking the fileserver
1330 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1331 * salvaging a whole partition
1333 * @return whether we obtained the volume summary information or not
1334 * @retval 0 success; we obtained the volume summary information
1335 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1337 * @retval 1 we did not get the volume summary information; either the
1338 * fileserver responded with an error, or we are not supposed to
1339 * ask the fileserver for the information (e.g. we are salvaging
1340 * the entire partition or we are not the salvageserver)
1342 * @note for non-DAFS, always returns 1
1345 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1348 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1349 if (programType == salvageServer) {
1350 if (singleVolumeNumber) {
1351 FSSYNC_VGQry_response_t q_res;
1353 struct VolumeSummary *vsp;
1355 struct VolumeDiskHeader diskHdr;
1357 memset(&res, 0, sizeof(res));
1359 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1362 * We must wait for the partition to finish scanning before
1363 * can continue, since we will not know if we got the entire
1364 * VG membership unless the partition is fully scanned.
1365 * We could, in theory, just scan the partition ourselves if
1366 * the VG cache is not ready, but we would be doing the exact
1367 * same scan the fileserver is doing; it will almost always
1368 * be faster to wait for the fileserver. The only exceptions
1369 * are if the partition does not take very long to scan, and
1370 * in that case it's fast either way, so who cares?
1372 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1373 Log("waiting for fileserver to finish scanning partition %s...\n",
1374 salvinfo->fileSysPartition->name);
1376 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1377 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1378 * just so small partitions don't need to wait over 10
1379 * seconds every time, and large partitions are generally
1380 * polled only once every ten seconds. */
1381 sleep((i > 10) ? (i = 10) : i);
1383 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1387 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1388 /* This can happen if there's no header for the volume
1389 * we're salvaging, or no headers exist for the VG (if
1390 * we're salvaging an RW). Act as if we got a response
1391 * with no VG members. The headers may be created during
1392 * salvaging, if there are inodes in this VG. */
1394 memset(&q_res, 0, sizeof(q_res));
1395 q_res.rw = singleVolumeNumber;
1399 Log("fileserver refused VGCQuery request for volume %lu on "
1400 "partition %s, code %ld reason %ld\n",
1401 afs_printable_uint32_lu(singleVolumeNumber),
1402 salvinfo->fileSysPartition->name,
1403 afs_printable_int32_ld(code),
1404 afs_printable_int32_ld(res.hdr.reason));
1408 if (q_res.rw != singleVolumeNumber) {
1409 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1410 afs_printable_uint32_lu(singleVolumeNumber),
1411 afs_printable_uint32_lu(q_res.rw));
1412 #ifdef SALVSYNC_BUILD_CLIENT
1413 if (SALVSYNC_LinkVolume(q_res.rw,
1415 salvinfo->fileSysPartition->name,
1417 Log("schedule request failed\n");
1419 #endif /* SALVSYNC_BUILD_CLIENT */
1420 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1423 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1424 osi_Assert(salvinfo->volumeSummaryp != NULL);
1426 salvinfo->nVolumes = 0;
1427 vsp = salvinfo->volumeSummaryp;
1429 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1430 char name[VMAXPATHLEN];
1432 if (!q_res.children[i]) {
1436 /* AskOffline for singleVolumeNumber was called much earlier */
1437 if (q_res.children[i] != singleVolumeNumber) {
1438 AskOffline(salvinfo, q_res.children[i]);
1439 if (LockVolume(salvinfo, q_res.children[i])) {
1445 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1447 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1448 afs_printable_uint32_lu(q_res.children[i]));
1453 DiskToVolumeHeader(&vsp->header, &diskHdr);
1454 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1455 vsp->fileName = ToString(name);
1456 salvinfo->nVolumes++;
1460 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1465 Log("Cannot get volume summary from fileserver; falling back to scanning "
1466 "entire partition\n");
1469 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1474 * count how many volume headers are found by VWalkVolumeHeaders.
1476 * @param[in] dp the disk partition (unused)
1477 * @param[in] name full path to the .vol header (unused)
1478 * @param[in] hdr the header data (unused)
1479 * @param[in] last whether this is the last try or not (unused)
1480 * @param[in] rock actually an afs_int32*; the running count of how many
1481 * volumes we have found
1486 CountHeader(struct DiskPartition64 *dp, const char *name,
1487 struct VolumeDiskHeader *hdr, int last, void *rock)
1489 afs_int32 *nvols = (afs_int32 *)rock;
1495 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1498 struct SalvageScanParams {
1499 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1500 * vol id of the VG we're salvaging */
1501 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1502 * we're filling in */
1503 afs_int32 nVolumes; /**< # of vols we've encountered */
1504 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1505 * # of vols we've alloc'd memory for) */
1506 int retry; /**< do we need to retry vol lock/checkout? */
1507 struct SalvInfo *salvinfo; /**< salvage job info */
1511 * records volume summary info found from VWalkVolumeHeaders.
1513 * Found volumes are also taken offline if they are in the specific volume
1514 * group we are looking for.
1516 * @param[in] dp the disk partition
1517 * @param[in] name full path to the .vol header
1518 * @param[in] hdr the header data
1519 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1520 * @param[in] rock actually a struct SalvageScanParams*, containing the
1521 * information needed to record the volume summary data
1523 * @return operation status
1525 * @retval -1 volume locking raced with fileserver restart; checking out
1526 * and locking volumes needs to be retried
1527 * @retval 1 volume header is mis-named and should be deleted
1530 RecordHeader(struct DiskPartition64 *dp, const char *name,
1531 struct VolumeDiskHeader *hdr, int last, void *rock)
1533 char nameShouldBe[64];
1534 struct SalvageScanParams *params;
1535 struct VolumeSummary summary;
1536 VolumeId singleVolumeNumber;
1537 struct SalvInfo *salvinfo;
1539 params = (struct SalvageScanParams *)rock;
1541 singleVolumeNumber = params->singleVolumeNumber;
1542 salvinfo = params->salvinfo;
1544 DiskToVolumeHeader(&summary.header, hdr);
1546 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1547 && summary.header.parent != singleVolumeNumber) {
1549 if (programType == salvageServer) {
1550 #ifdef SALVSYNC_BUILD_CLIENT
1551 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1552 summary.header.id, summary.header.parent);
1553 if (SALVSYNC_LinkVolume(summary.header.parent,
1557 Log("schedule request failed\n");
1560 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1563 Log("%u is a read-only volume; not salvaged\n",
1564 singleVolumeNumber);
1569 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1570 || summary.header.parent == singleVolumeNumber) {
1572 /* check if the header file is incorrectly named */
1574 const char *base = strrchr(name, OS_DIRSEPC);
1581 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1582 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1585 if (strcmp(nameShouldBe, base)) {
1586 /* .vol file has wrong name; retry/delete */
1590 if (!badname || last) {
1591 /* only offline the volume if the header is good, or if this is
1592 * the last try looking at it; avoid AskOffline'ing the same vol
1595 if (singleVolumeNumber
1596 && summary.header.id != singleVolumeNumber) {
1597 /* don't offline singleVolumeNumber; we already did that
1600 AskOffline(salvinfo, summary.header.id);
1602 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1604 /* don't lock the volume if the header is bad, since we're
1605 * about to delete it anyway. */
1606 if (LockVolume(salvinfo, summary.header.id)) {
1611 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1615 if (last && !Showmode) {
1616 Log("Volume header file %s is incorrectly named (should be %s "
1617 "not %s); %sdeleted (it will be recreated later, if "
1618 "necessary)\n", name, nameShouldBe, base,
1619 (Testing ? "it would have been " : ""));
1624 summary.fileName = ToString(base);
1627 if (params->nVolumes > params->totalVolumes) {
1628 /* We found more volumes than we found on the first partition walk;
1629 * apparently something created a volume while we were
1630 * partition-salvaging, or we found more than 20 vols when salvaging a
1631 * particular volume. Abort if we detect this, since other programs
1632 * supposed to not touch the partition while it is partition-salvaging,
1633 * and we shouldn't find more than 20 vols in a VG.
1635 Abort("Found %ld vol headers, but should have found at most %ld! "
1636 "Make sure the volserver/fileserver are not running at the "
1637 "same time as a partition salvage\n",
1638 afs_printable_int32_ld(params->nVolumes),
1639 afs_printable_int32_ld(params->totalVolumes));
1642 memcpy(params->vsp, &summary, sizeof(summary));
1650 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1652 * If the header could not be read in at all, the header is always unlinked.
1653 * If instead RecordHeader said the header was bad (that is, the header file
1654 * is mis-named), we only unlink if we are doing a partition salvage, as
1655 * opposed to salvaging a specific volume group.
1657 * @param[in] dp the disk partition
1658 * @param[in] name full path to the .vol header
1659 * @param[in] hdr header data, or NULL if the header could not be read
1660 * @param[in] rock actually a struct SalvageScanParams*, with some information
1664 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1665 struct VolumeDiskHeader *hdr, void *rock)
1667 struct SalvageScanParams *params;
1670 params = (struct SalvageScanParams *)rock;
1673 /* no header; header is too bogus to read in at all */
1675 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1681 } else if (!params->singleVolumeNumber) {
1682 /* We were able to read in a header, but RecordHeader said something
1683 * was wrong with it. We only unlink those if we are doing a partition
1690 if (dounlink && unlink(name)) {
1691 Log("Error %d while trying to unlink %s\n", errno, name);
1696 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1697 * the fileserver for VG information, or by scanning the /vicepX partition.
1699 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1700 * are salvaging, or 0 if this is a partition
1703 * @return operation status
1705 * @retval -1 we raced with a fileserver restart; checking out and locking
1706 * volumes must be retried
1709 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1711 afs_int32 nvols = 0;
1712 struct SalvageScanParams params;
1715 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1717 /* we successfully got the vol information from the fileserver; no
1718 * need to scan the partition */
1722 /* we need to retry volume checkout */
1726 if (!singleVolumeNumber) {
1727 /* Count how many volumes we have in /vicepX */
1728 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1731 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1736 nvols = VOL_VG_MAX_VOLS;
1739 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1740 osi_Assert(salvinfo->volumeSummaryp != NULL);
1742 params.singleVolumeNumber = singleVolumeNumber;
1743 params.vsp = salvinfo->volumeSummaryp;
1744 params.nVolumes = 0;
1745 params.totalVolumes = nvols;
1747 params.salvinfo = salvinfo;
1749 /* walk the partition directory of volume headers and record the info
1750 * about them; unlinking invalid headers */
1751 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1752 UnlinkHeader, ¶ms);
1754 /* we apparently need to retry checking-out/locking volumes */
1758 Abort("Failed to get volume header summary\n");
1760 salvinfo->nVolumes = params.nVolumes;
1762 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1768 /* Find the link table. This should be associated with the RW volume or, if
1769 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1772 FindLinkHandle(struct InodeSummary *isp, int nVols,
1773 struct ViceInodeInfo *allInodes)
1776 struct ViceInodeInfo *ip;
1778 for (i = 0; i < nVols; i++) {
1779 ip = allInodes + isp[i].index;
1780 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1781 if (ip[j].u.special.type == VI_LINKTABLE)
1782 return ip[j].inodeNumber;
1789 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1791 struct versionStamp version;
1794 if (!VALID_INO(ino))
1796 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1797 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1798 if (!VALID_INO(ino))
1800 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1801 isp->RWvolumeId, errno);
1802 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1803 fdP = IH_OPEN(salvinfo->VGLinkH);
1805 Abort("Can't open link table for volume %u (error = %d)\n",
1806 isp->RWvolumeId, errno);
1808 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1809 Abort("Can't truncate link table for volume %u (error = %d)\n",
1810 isp->RWvolumeId, errno);
1812 version.magic = LINKTABLEMAGIC;
1813 version.version = LINKTABLEVERSION;
1815 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1817 Abort("Can't truncate link table for volume %u (error = %d)\n",
1818 isp->RWvolumeId, errno);
1820 FDH_REALLYCLOSE(fdP);
1822 /* If the volume summary exits (i.e., the V*.vol header file exists),
1823 * then set this inode there as well.
1825 if (isp->volSummary)
1826 isp->volSummary->header.linkTable = ino;
1835 SVGParms_t *parms = (SVGParms_t *) arg;
1836 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1841 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1844 pthread_attr_t tattr;
1848 /* Initialize per volume global variables, even if later code does so */
1849 salvinfo->VolumeChanged = 0;
1850 salvinfo->VGLinkH = NULL;
1851 salvinfo->VGLinkH_cnt = 0;
1852 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1854 parms.svgp_inodeSummaryp = isp;
1855 parms.svgp_count = nVols;
1856 parms.svgp_salvinfo = salvinfo;
1857 code = pthread_attr_init(&tattr);
1859 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1863 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1865 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1868 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1870 Log("Failed to create thread to salvage volume group %u\n",
1874 (void)pthread_join(tid, NULL);
1876 #endif /* AFS_NT40_ENV */
1879 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1881 struct ViceInodeInfo *inodes, *allInodes, *ip;
1882 int i, totalInodes, size, salvageTo;
1886 int dec_VGLinkH = 0;
1888 FdHandle_t *fdP = NULL;
1890 salvinfo->VGLinkH_cnt = 0;
1891 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1892 && isp->nSpecialInodes > 0);
1893 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1894 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1897 if (ShowMounts && !haveRWvolume)
1899 if (canfork && !debug && Fork() != 0) {
1900 (void)Wait("Salvage volume group");
1903 for (i = 0, totalInodes = 0; i < nVols; i++)
1904 totalInodes += isp[i].nInodes;
1905 size = totalInodes * sizeof(struct ViceInodeInfo);
1906 inodes = (struct ViceInodeInfo *)malloc(size);
1907 allInodes = inodes - isp->index; /* this would the base of all the inodes
1908 * for the partition, if all the inodes
1909 * had been read into memory */
1911 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1913 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1915 /* Don't try to salvage a read write volume if there isn't one on this
1917 salvageTo = haveRWvolume ? 0 : 1;
1919 #ifdef AFS_NAMEI_ENV
1920 ino = FindLinkHandle(isp, nVols, allInodes);
1921 if (VALID_INO(ino)) {
1922 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1923 fdP = IH_OPEN(salvinfo->VGLinkH);
1925 if (!VALID_INO(ino) || fdP == NULL) {
1926 Log("%s link table for volume %u.\n",
1927 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1929 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1932 struct ViceInodeInfo *ip;
1933 CreateLinkTable(salvinfo, isp, ino);
1934 fdP = IH_OPEN(salvinfo->VGLinkH);
1935 /* Sync fake 1 link counts to the link table, now that it exists */
1937 for (i = 0; i < nVols; i++) {
1938 ip = allInodes + isp[i].index;
1939 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1940 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1947 FDH_REALLYCLOSE(fdP);
1949 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1952 /* Salvage in reverse order--read/write volume last; this way any
1953 * Inodes not referenced by the time we salvage the read/write volume
1954 * can be picked up by the read/write volume */
1955 /* ACTUALLY, that's not done right now--the inodes just vanish */
1956 for (i = nVols - 1; i >= salvageTo; i--) {
1958 struct InodeSummary *lisp = &isp[i];
1959 #ifdef AFS_NAMEI_ENV
1960 /* If only the RO is present on this partition, the link table
1961 * shows up as a RW volume special file. Need to make sure the
1962 * salvager doesn't try to salvage the non-existent RW.
1964 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1965 /* If this only special inode is the link table, continue */
1966 if (inodes->u.special.type == VI_LINKTABLE) {
1973 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1974 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1975 /* Check inodes twice. The second time do things seriously. This
1976 * way the whole RO volume can be deleted, below, if anything goes wrong */
1977 for (check = 1; check >= 0; check--) {
1979 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1981 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1982 if (rw && deleteMe) {
1983 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1984 * volume won't be called */
1990 if (rw && check == 1)
1992 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1993 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1999 /* Fix actual inode counts */
2002 Log("totalInodes %d\n",totalInodes);
2003 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2004 static int TraceBadLinkCounts = 0;
2005 #ifdef AFS_NAMEI_ENV
2006 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2007 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2008 VGLinkH_p1 = ip->u.param[0];
2009 continue; /* Deal with this last. */
2012 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2013 TraceBadLinkCounts--; /* Limit reports, per volume */
2014 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2016 while (ip->linkCount > 0) {
2017 /* below used to assert, not break */
2019 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2020 Log("idec failed. inode %s errno %d\n",
2021 PrintInode(stmp, ip->inodeNumber), errno);
2027 while (ip->linkCount < 0) {
2028 /* these used to be asserts */
2030 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2031 Log("iinc failed. inode %s errno %d\n",
2032 PrintInode(stmp, ip->inodeNumber), errno);
2039 #ifdef AFS_NAMEI_ENV
2040 while (dec_VGLinkH > 0) {
2041 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2042 Log("idec failed on link table, errno = %d\n", errno);
2046 while (dec_VGLinkH < 0) {
2047 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2048 Log("iinc failed on link table, errno = %d\n", errno);
2055 /* Directory consistency checks on the rw volume */
2057 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2058 IH_RELEASE(salvinfo->VGLinkH);
2060 if (canfork && !debug) {
2067 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2069 /* Check headers BEFORE forking */
2073 for (i = 0; i < nVols; i++) {
2074 struct VolumeSummary *vs = isp[i].volSummary;
2075 VolumeDiskData volHeader;
2077 /* Don't salvage just because phantom rw volume is there... */
2078 /* (If a read-only volume exists, read/write inodes must also exist) */
2079 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2083 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2084 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2085 == sizeof(volHeader)
2086 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2087 && volHeader.dontSalvage == DONT_SALVAGE
2088 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2089 if (volHeader.inUse != 0) {
2090 volHeader.inUse = 0;
2091 volHeader.inService = 1;
2093 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2094 != sizeof(volHeader)) {
2110 /* SalvageVolumeHeaderFile
2112 * Salvage the top level V*.vol header file. Make sure the special files
2113 * exist and that there are no duplicates.
2115 * Calls SalvageHeader for each possible type of volume special file.
2119 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2120 struct ViceInodeInfo *inodes, int RW,
2121 int check, int *deleteMe)
2124 struct ViceInodeInfo *ip;
2125 int allinodesobsolete = 1;
2126 struct VolumeDiskHeader diskHeader;
2127 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2129 struct VolumeHeader tempHeader;
2130 struct afs_inode_info stuff[MAXINODETYPE];
2132 /* keeps track of special inodes that are probably 'good'; they are
2133 * referenced in the vol header, and are included in the given inodes
2138 } goodspecial[MAXINODETYPE];
2143 memset(goodspecial, 0, sizeof(goodspecial));
2145 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2147 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2149 Log("cannot allocate memory for inode skip array when salvaging "
2150 "volume %lu; not performing duplicate special inode recovery\n",
2151 afs_printable_uint32_lu(isp->volumeId));
2152 /* still try to perform the salvage; the skip array only does anything
2153 * if we detect duplicate special inodes */
2156 init_inode_info(&tempHeader, stuff);
2159 * First, look at the special inodes and see if any are referenced by
2160 * the existing volume header. If we find duplicate special inodes, we
2161 * can use this information to use the referenced inode (it's more
2162 * likely to be the 'good' one), and throw away the duplicates.
2164 if (isp->volSummary && skip) {
2165 /* use tempHeader, so we can use the stuff[] array to easily index
2166 * into the isp->volSummary special inodes */
2167 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2169 for (i = 0; i < isp->nSpecialInodes; i++) {
2170 ip = &inodes[isp->index + i];
2171 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2172 /* will get taken care of in a later loop */
2175 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2176 goodspecial[ip->u.special.type-1].valid = 1;
2177 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2182 memset(&tempHeader, 0, sizeof(tempHeader));
2183 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2184 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2185 tempHeader.id = isp->volumeId;
2186 tempHeader.parent = isp->RWvolumeId;
2188 /* Check for duplicates (inodes are sorted by type field) */
2189 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2190 ip = &inodes[isp->index + i];
2191 if (ip->u.special.type == (ip + 1)->u.special.type) {
2192 afs_ino_str_t stmp1, stmp2;
2194 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2195 /* Will be caught in the loop below */
2199 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2200 ip->u.special.type, isp->volumeId,
2201 PrintInode(stmp1, ip->inodeNumber),
2202 PrintInode(stmp2, (ip+1)->inodeNumber));
2204 if (skip && goodspecial[ip->u.special.type-1].valid) {
2205 Inode gi = goodspecial[ip->u.special.type-1].inode;
2208 Log("using special inode referenced by vol header (%s)\n",
2209 PrintInode(stmp1, gi));
2212 /* the volume header references some special inode of
2213 * this type in the inodes array; are we it? */
2214 if (ip->inodeNumber != gi) {
2216 } else if ((ip+1)->inodeNumber != gi) {
2217 /* in case this is the last iteration; we need to
2218 * make sure we check ip+1, too */
2223 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2231 for (i = 0; i < isp->nSpecialInodes; i++) {
2233 ip = &inodes[isp->index + i];
2234 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2236 Log("Rubbish header inode %s of type %d\n",
2237 PrintInode(stmp, ip->inodeNumber),
2238 ip->u.special.type);
2244 Log("Rubbish header inode %s of type %d; deleted\n",
2245 PrintInode(stmp, ip->inodeNumber),
2246 ip->u.special.type);
2247 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2248 if (skip && skip[i]) {
2249 if (orphans == ORPH_REMOVE) {
2250 Log("Removing orphan special inode %s of type %d\n",
2251 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2254 Log("Ignoring orphan special inode %s of type %d\n",
2255 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2256 /* fall through to the ip->linkCount--; line below */
2259 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2260 allinodesobsolete = 0;
2262 if (!check && ip->u.special.type != VI_LINKTABLE)
2263 ip->linkCount--; /* Keep the inode around */
2271 if (allinodesobsolete) {
2278 salvinfo->VGLinkH_cnt++; /* one for every header. */
2280 if (!RW && !check && isp->volSummary) {
2281 ClearROInUseBit(isp->volSummary);
2285 for (i = 0; i < MAXINODETYPE; i++) {
2286 if (stuff[i].inodeType == VI_LINKTABLE) {
2287 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2288 * And we may have recreated the link table earlier, so set the
2289 * RW header as well.
2291 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2292 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2296 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2300 if (isp->volSummary == NULL) {
2302 char headerName[64];
2303 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2304 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2306 Log("No header file for volume %u\n", isp->volumeId);
2310 Log("No header file for volume %u; %screating %s\n",
2311 isp->volumeId, (Testing ? "it would have been " : ""),
2313 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2314 isp->volSummary->fileName = ToString(headerName);
2316 writefunc = VCreateVolumeDiskHeader;
2319 char headerName[64];
2320 /* hack: these two fields are obsolete... */
2321 isp->volSummary->header.volumeAcl = 0;
2322 isp->volSummary->header.volumeMountTable = 0;
2325 (&isp->volSummary->header, &tempHeader,
2326 sizeof(struct VolumeHeader))) {
2327 /* We often remove the name before calling us, so we make a fake one up */
2328 if (isp->volSummary->fileName) {
2329 strcpy(headerName, isp->volSummary->fileName);
2331 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2332 isp->volSummary->fileName = ToString(headerName);
2334 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2336 Log("Header file %s is damaged or no longer valid%s\n", path,
2337 (check ? "" : "; repairing"));
2341 writefunc = VWriteVolumeDiskHeader;
2345 memcpy(&isp->volSummary->header, &tempHeader,
2346 sizeof(struct VolumeHeader));
2349 Log("It would have written a new header file for volume %u\n",
2353 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2354 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2356 Log("Error %ld writing volume header file for volume %lu\n",
2357 afs_printable_int32_ld(code),
2358 afs_printable_uint32_lu(diskHeader.id));
2363 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2364 isp->volSummary->header.volumeInfo);
2369 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2370 struct InodeSummary *isp, int check, int *deleteMe)
2373 VolumeDiskData volumeInfo;
2374 struct versionStamp fileHeader;
2383 #ifndef AFS_NAMEI_ENV
2384 if (sp->inodeType == VI_LINKTABLE)
2387 if (*(sp->inode) == 0) {
2389 Log("Missing inode in volume header (%s)\n", sp->description);
2393 Log("Missing inode in volume header (%s); %s\n", sp->description,
2394 (Testing ? "it would have recreated it" : "recreating"));
2397 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2398 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2399 if (!VALID_INO(*(sp->inode)))
2401 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2402 sp->description, errno);
2407 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2408 fdP = IH_OPEN(specH);
2409 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2410 /* bail out early and destroy the volume */
2412 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2419 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2420 sp->description, errno);
2423 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2424 || header.fileHeader.magic != sp->stamp.magic)) {
2426 Log("Part of the header (%s) is corrupted\n", sp->description);
2427 FDH_REALLYCLOSE(fdP);
2431 Log("Part of the header (%s) is corrupted; recreating\n",
2434 /* header can be garbage; make sure we don't read garbage data from
2436 memset(&header, 0, sizeof(header));
2438 if (sp->inodeType == VI_VOLINFO
2439 && header.volumeInfo.destroyMe == DESTROY_ME) {
2442 FDH_REALLYCLOSE(fdP);
2446 if (recreate && !Testing) {
2449 ("Internal error: recreating volume header (%s) in check mode\n",
2451 nBytes = FDH_TRUNC(fdP, 0);
2453 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2454 sp->description, errno);
2456 /* The following code should be moved into vutil.c */
2457 if (sp->inodeType == VI_VOLINFO) {
2459 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2460 header.volumeInfo.stamp = sp->stamp;
2461 header.volumeInfo.id = isp->volumeId;
2462 header.volumeInfo.parentId = isp->RWvolumeId;
2463 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2464 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2465 isp->volumeId, isp->volumeId);
2466 header.volumeInfo.inService = 0;
2467 header.volumeInfo.blessed = 0;
2468 /* The + 1000 is a hack in case there are any files out in venus caches */
2469 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2470 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2471 header.volumeInfo.needsCallback = 0;
2472 gettimeofday(&tp, 0);
2473 header.volumeInfo.creationDate = tp.tv_sec;
2475 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2476 sizeof(header.volumeInfo), 0);
2477 if (nBytes != sizeof(header.volumeInfo)) {
2480 ("Unable to write volume header file (%s) (errno = %d)\n",
2481 sp->description, errno);
2482 Abort("Unable to write entire volume header file (%s)\n",
2486 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2487 if (nBytes != sizeof(sp->stamp)) {
2490 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2491 sp->description, errno);
2493 ("Unable to write entire version stamp in volume header file (%s)\n",
2498 FDH_REALLYCLOSE(fdP);
2500 if (sp->inodeType == VI_VOLINFO) {
2501 salvinfo->VolInfo = header.volumeInfo;
2505 if (salvinfo->VolInfo.updateDate) {
2506 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2508 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2509 salvinfo->VolInfo.id,
2510 (Testing ? "it would have been " : ""), update);
2512 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2514 Log("%s (%u) not updated (created %s)\n",
2515 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2525 SalvageVnodes(struct SalvInfo *salvinfo,
2526 struct InodeSummary *rwIsp,
2527 struct InodeSummary *thisIsp,
2528 struct ViceInodeInfo *inodes, int check)
2530 int ilarge, ismall, ioffset, RW, nInodes;
2531 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2534 RW = (rwIsp == thisIsp);
2535 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2537 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2538 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2539 if (check && ismall == -1)
2542 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2543 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2544 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2548 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2549 struct ViceInodeInfo *ip, int nInodes,
2550 struct VolumeSummary *volSummary, int check)
2552 char buf[SIZEOF_LARGEDISKVNODE];
2553 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2555 StreamHandle_t *file;
2556 struct VnodeClassInfo *vcp;
2558 afs_sfsize_t nVnodes;
2559 afs_fsize_t vnodeLength;
2561 afs_ino_str_t stmp1, stmp2;
2565 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2566 fdP = IH_OPEN(handle);
2567 osi_Assert(fdP != NULL);
2568 file = FDH_FDOPEN(fdP, "r+");
2569 osi_Assert(file != NULL);
2570 vcp = &VnodeClassInfo[class];
2571 size = OS_SIZE(fdP->fd_fd);
2572 osi_Assert(size != -1);
2573 nVnodes = (size / vcp->diskSize) - 1;
2575 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2576 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2580 for (vnodeIndex = 0;
2581 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2582 nVnodes--, vnodeIndex++) {
2583 if (vnode->type != vNull) {
2584 int vnodeChanged = 0;
2585 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2586 if (VNDISK_GET_INO(vnode) == 0) {
2588 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2589 memset(vnode, 0, vcp->diskSize);
2593 if (vcp->magic != vnode->vnodeMagic) {
2594 /* bad magic #, probably partially created vnode */
2596 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2597 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2598 afs_printable_uint32_lu(vcp->magic));
2599 memset(vnode, 0, vcp->diskSize);
2603 Log("Partially allocated vnode %d deleted.\n",
2605 memset(vnode, 0, vcp->diskSize);
2609 /* ****** Should do a bit more salvage here: e.g. make sure
2610 * vnode type matches what it should be given the index */
2611 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2612 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2613 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2614 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2621 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2622 /* The following doesn't work, because the version number
2623 * is not maintained correctly by the file server */
2624 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2625 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2627 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2633 /* For RW volume, look for vnode with matching inode number;
2634 * if no such match, take the first determined by our sort
2636 struct ViceInodeInfo *lip = ip;
2637 int lnInodes = nInodes;
2639 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2640 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2649 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2650 /* "Matching" inode */
2654 vu = vnode->uniquifier;
2655 iu = ip->u.vnode.vnodeUniquifier;
2656 vd = vnode->dataVersion;
2657 id = ip->u.vnode.inodeDataVersion;
2659 * Because of the possibility of the uniquifier overflows (> 4M)
2660 * we compare them modulo the low 22-bits; we shouldn't worry
2661 * about mismatching since they shouldn't to many old
2662 * uniquifiers of the same vnode...
2664 if (IUnique(vu) != IUnique(iu)) {
2666 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2669 vnode->uniquifier = iu;
2670 #ifdef AFS_3DISPARES
2671 vnode->dataVersion = (id >= vd ?
2674 1887437 ? vd : id) :
2677 1887437 ? id : vd));
2679 #if defined(AFS_SGI_EXMAG)
2680 vnode->dataVersion = (id >= vd ?
2683 15099494 ? vd : id) :
2686 15099494 ? id : vd));
2688 vnode->dataVersion = (id > vd ? id : vd);
2689 #endif /* AFS_SGI_EXMAG */
2690 #endif /* AFS_3DISPARES */
2693 /* don't bother checking for vd > id any more, since
2694 * partial file transfers always result in this state,
2695 * and you can't do much else anyway (you've already
2696 * found the best data you can) */
2697 #ifdef AFS_3DISPARES
2698 if (!vnodeIsDirectory(vnodeNumber)
2699 && ((vd < id && (id - vd) < 1887437)
2700 || ((vd > id && (vd - id) > 1887437)))) {
2702 #if defined(AFS_SGI_EXMAG)
2703 if (!vnodeIsDirectory(vnodeNumber)
2704 && ((vd < id && (id - vd) < 15099494)
2705 || ((vd > id && (vd - id) > 15099494)))) {
2707 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2708 #endif /* AFS_SGI_EXMAG */
2711 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2712 vnode->dataVersion = id;
2717 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2720 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2722 VNDISK_SET_INO(vnode, ip->inodeNumber);
2727 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2729 VNDISK_SET_INO(vnode, ip->inodeNumber);
2732 VNDISK_GET_LEN(vnodeLength, vnode);
2733 if (ip->byteCount != vnodeLength) {
2736 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2741 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2742 VNDISK_SET_LEN(vnode, ip->byteCount);
2746 ip->linkCount--; /* Keep the inode around */
2749 } else { /* no matching inode */
2751 if (VNDISK_GET_INO(vnode) != 0
2752 || vnode->type == vDirectory) {
2753 /* No matching inode--get rid of the vnode */
2755 if (VNDISK_GET_INO(vnode)) {
2757 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2761 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2766 if (VNDISK_GET_INO(vnode)) {
2768 time_t serverModifyTime = vnode->serverModifyTime;
2769 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2773 time_t serverModifyTime = vnode->serverModifyTime;
2774 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2777 memset(vnode, 0, vcp->diskSize);
2780 /* Should not reach here becuase we checked for
2781 * (inodeNumber == 0) above. And where we zero the vnode,
2782 * we also goto vnodeDone.
2786 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2790 } /* VNDISK_GET_INO(vnode) != 0 */
2792 osi_Assert(!(vnodeChanged && check));
2793 if (vnodeChanged && !Testing) {
2794 osi_Assert(IH_IWRITE
2795 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2796 (char *)vnode, vcp->diskSize)
2798 salvinfo->VolumeChanged = 1; /* For break call back */
2809 struct VnodeEssence *
2810 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2813 struct VnodeInfo *vip;
2816 class = vnodeIdToClass(vnodeNumber);
2817 vip = &salvinfo->vnodeInfo[class];
2818 offset = vnodeIdToBitNumber(vnodeNumber);
2819 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2823 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2825 /* Copy the directory unconditionally if we are going to change it:
2826 * not just if was cloned.
2828 struct VnodeDiskObject vnode;
2829 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2830 Inode oldinode, newinode;
2833 if (dir->copied || Testing)
2835 DFlush(); /* Well justified paranoia... */
2838 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2839 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2841 osi_Assert(code == sizeof(vnode));
2842 oldinode = VNDISK_GET_INO(&vnode);
2843 /* Increment the version number by a whole lot to avoid problems with
2844 * clients that were promised new version numbers--but the file server
2845 * crashed before the versions were written to disk.
2848 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2849 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2851 osi_Assert(VALID_INO(newinode));
2852 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2854 VNDISK_SET_INO(&vnode, newinode);
2856 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2857 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2859 osi_Assert(code == sizeof(vnode));
2861 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2862 salvinfo->fileSysDevice, newinode,
2863 &salvinfo->VolumeChanged);
2864 /* Don't delete the original inode right away, because the directory is
2865 * still being scanned.
2871 * This function should either successfully create a new dir, or give up
2872 * and leave things the way they were. In particular, if it fails to write
2873 * the new dir properly, it should return w/o changing the reference to the
2877 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2879 struct VnodeDiskObject vnode;
2880 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2881 Inode oldinode, newinode;
2886 afs_int32 parentUnique = 1;
2887 struct VnodeEssence *vnodeEssence;
2892 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2894 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2895 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2897 osi_Assert(lcode == sizeof(vnode));
2898 oldinode = VNDISK_GET_INO(&vnode);
2899 /* Increment the version number by a whole lot to avoid problems with
2900 * clients that were promised new version numbers--but the file server
2901 * crashed before the versions were written to disk.
2904 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2905 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2907 osi_Assert(VALID_INO(newinode));
2908 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2909 &salvinfo->VolumeChanged);
2911 /* Assign . and .. vnode numbers from dir and vnode.parent.
2912 * The uniquifier for . is in the vnode.
2913 * The uniquifier for .. might be set to a bogus value of 1 and
2914 * the salvager will later clean it up.
2916 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2917 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2920 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2922 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2927 /* didn't really build the new directory properly, let's just give up. */
2928 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2929 Log("Directory salvage returned code %d, continuing.\n", code);
2931 Log("also failed to decrement link count on new inode");
2935 Log("Checking the results of the directory salvage...\n");
2936 if (!DirOK(&newdir)) {
2937 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2938 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2939 osi_Assert(code == 0);
2943 VNDISK_SET_INO(&vnode, newinode);
2944 length = Length(&newdir);
2945 VNDISK_SET_LEN(&vnode, length);
2947 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2948 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2950 osi_Assert(lcode == sizeof(vnode));
2953 nt_sync(salvinfo->fileSysDevice);
2955 sync(); /* this is slow, but hopefully rarely called. We don't have
2956 * an open FD on the file itself to fsync.
2960 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2962 /* make sure old directory file is really closed */
2963 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2964 FDH_REALLYCLOSE(fdP);
2966 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2967 osi_Assert(code == 0);
2968 dir->dirHandle = newdir;
2972 * arguments for JudgeEntry.
2974 struct judgeEntry_params {
2975 struct DirSummary *dir; /**< directory we're examining entries in */
2976 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2980 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2983 struct judgeEntry_params *params = arock;
2984 struct DirSummary *dir = params->dir;
2985 struct SalvInfo *salvinfo = params->salvinfo;
2986 struct VnodeEssence *vnodeEssence;
2987 afs_int32 dirOrphaned, todelete;
2989 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2991 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2992 if (vnodeEssence == NULL) {
2994 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2997 CopyOnWrite(salvinfo, dir);
2998 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3003 #ifndef AFS_NAMEI_ENV
3004 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3005 * mount inode for the partition. If this inode were deleted, it would crash
3008 if (vnodeEssence->InodeNumber == 0) {
3009 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3011 CopyOnWrite(salvinfo, dir);
3012 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3019 if (!(vnodeNumber & 1) && !Showmode
3020 && !(vnodeEssence->count || vnodeEssence->unique
3021 || vnodeEssence->modeBits)) {
3022 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3023 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3024 vnodeNumber, unique,
3025 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3029 CopyOnWrite(salvinfo, dir);
3030 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3036 /* Check if the Uniquifiers match. If not, change the directory entry
3037 * so its unique matches the vnode unique. Delete if the unique is zero
3038 * or if the directory is orphaned.
3040 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3041 if (!vnodeEssence->unique
3042 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3043 /* This is an orphaned directory. Don't delete the . or ..
3044 * entry. Otherwise, it will get created in the next
3045 * salvage and deleted again here. So Just skip it.
3050 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3053 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3057 fid.Vnode = vnodeNumber;
3058 fid.Unique = vnodeEssence->unique;
3059 CopyOnWrite(salvinfo, dir);
3060 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3062 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3065 return 0; /* no need to continue */
3068 if (strcmp(name, ".") == 0) {
3069 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3072 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3074 CopyOnWrite(salvinfo, dir);
3075 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3076 fid.Vnode = dir->vnodeNumber;
3077 fid.Unique = dir->unique;
3078 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3081 vnodeNumber = fid.Vnode; /* Get the new Essence */
3082 unique = fid.Unique;
3083 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3086 } else if (strcmp(name, "..") == 0) {
3089 struct VnodeEssence *dotdot;
3090 pa.Vnode = dir->parent;
3091 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3092 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3093 pa.Unique = dotdot->unique;
3095 pa.Vnode = dir->vnodeNumber;
3096 pa.Unique = dir->unique;
3098 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3100 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3102 CopyOnWrite(salvinfo, dir);
3103 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3104 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3107 vnodeNumber = pa.Vnode; /* Get the new Essence */
3109 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3111 dir->haveDotDot = 1;
3112 } else if (strncmp(name, ".__afs", 6) == 0) {
3114 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3117 CopyOnWrite(salvinfo, dir);
3118 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3120 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3121 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3124 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3125 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3126 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3127 && !(vnodeEssence->modeBits & 0111)) {
3128 afs_sfsize_t nBytes;
3134 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3135 vnodeEssence->InodeNumber);
3138 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3142 size = FDH_SIZE(fdP);
3144 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3145 FDH_REALLYCLOSE(fdP);
3152 nBytes = FDH_PREAD(fdP, buf, size, 0);
3153 if (nBytes == size) {
3155 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3156 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3157 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3158 Testing ? "would convert" : "converted");
3159 vnodeEssence->modeBits |= 0111;
3160 vnodeEssence->changed = 1;
3161 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3162 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3163 dir->name ? dir->name : "??", name, buf);
3165 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3166 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3168 FDH_REALLYCLOSE(fdP);
3171 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3172 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3173 if (vnodeIdToClass(vnodeNumber) == vLarge
3174 && vnodeEssence->name == NULL) {
3176 if ((n = (char *)malloc(strlen(name) + 1)))
3178 vnodeEssence->name = n;
3181 /* The directory entry points to the vnode. Check to see if the
3182 * vnode points back to the directory. If not, then let the
3183 * directory claim it (else it might end up orphaned). Vnodes
3184 * already claimed by another directory are deleted from this
3185 * directory: hardlinks to the same vnode are not allowed
3186 * from different directories.
3188 if (vnodeEssence->parent != dir->vnodeNumber) {
3189 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3190 /* Vnode does not point back to this directory.
3191 * Orphaned dirs cannot claim a file (it may belong to
3192 * another non-orphaned dir).
3195 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3197 vnodeEssence->parent = dir->vnodeNumber;
3198 vnodeEssence->changed = 1;
3200 /* Vnode was claimed by another directory */
3203 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3204 } else if (vnodeNumber == 1) {
3205 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3207 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3211 CopyOnWrite(salvinfo, dir);
3212 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3217 /* This directory claims the vnode */
3218 vnodeEssence->claimed = 1;
3220 vnodeEssence->count--;
3225 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3226 VnodeClass class, Inode ino, Unique * maxu)
3228 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3229 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3230 char buf[SIZEOF_LARGEDISKVNODE];
3231 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3233 StreamHandle_t *file;
3238 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3239 fdP = IH_OPEN(vip->handle);
3240 osi_Assert(fdP != NULL);
3241 file = FDH_FDOPEN(fdP, "r+");
3242 osi_Assert(file != NULL);
3243 size = OS_SIZE(fdP->fd_fd);
3244 osi_Assert(size != -1);
3245 vip->nVnodes = (size / vcp->diskSize) - 1;
3246 if (vip->nVnodes > 0) {
3247 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3248 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3249 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3250 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3251 if (class == vLarge) {
3252 osi_Assert((vip->inodes = (Inode *)
3253 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3262 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3263 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3264 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3265 nVnodes--, vnodeIndex++) {
3266 if (vnode->type != vNull) {
3267 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3268 afs_fsize_t vnodeLength;
3269 vip->nAllocatedVnodes++;
3270 vep->count = vnode->linkCount;
3271 VNDISK_GET_LEN(vnodeLength, vnode);
3272 vep->blockCount = nBlocks(vnodeLength);
3273 vip->volumeBlockCount += vep->blockCount;
3274 vep->parent = vnode->parent;
3275 vep->unique = vnode->uniquifier;
3276 if (*maxu < vnode->uniquifier)
3277 *maxu = vnode->uniquifier;
3278 vep->modeBits = vnode->modeBits;
3279 vep->InodeNumber = VNDISK_GET_INO(vnode);
3280 vep->type = vnode->type;
3281 vep->author = vnode->author;
3282 vep->owner = vnode->owner;
3283 vep->group = vnode->group;
3284 if (vnode->type == vDirectory) {
3285 if (class != vLarge) {
3286 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3287 vip->nAllocatedVnodes--;
3288 memset(vnode, 0, sizeof(vnode));
3289 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3290 vnodeIndexOffset(vcp, vnodeNumber),
3291 (char *)&vnode, sizeof(vnode));
3292 salvinfo->VolumeChanged = 1;
3294 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3303 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3306 struct VnodeEssence *parentvp;
3312 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3313 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3314 strcat(path, OS_DIRSEP);
3315 strcat(path, vp->name);
3321 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3322 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3325 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3327 struct VnodeEssence *vep;
3330 return (1); /* Vnode zero does not exist */
3332 return (0); /* The root dir vnode is always claimed */
3333 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3334 if (!vep || !vep->claimed)
3335 return (1); /* Vnode is not claimed - it is orphaned */
3337 return (IsVnodeOrphaned(salvinfo, vep->parent));
3341 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3342 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3343 struct DirSummary *rootdir, int *rootdirfound)
3345 static struct DirSummary dir;
3346 static struct DirHandle dirHandle;
3347 struct VnodeEssence *parent;
3348 static char path[MAXPATHLEN];
3351 if (dirVnodeInfo->vnodes[i].salvaged)
3352 return; /* already salvaged */
3355 dirVnodeInfo->vnodes[i].salvaged = 1;
3357 if (dirVnodeInfo->inodes[i] == 0)
3358 return; /* Not allocated to a directory */
3360 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3361 if (dirVnodeInfo->vnodes[i].parent) {
3362 Log("Bad parent, vnode 1; %s...\n",
3363 (Testing ? "skipping" : "salvaging"));
3364 dirVnodeInfo->vnodes[i].parent = 0;
3365 dirVnodeInfo->vnodes[i].changed = 1;
3368 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3369 if (parent && parent->salvaged == 0)
3370 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3371 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3372 rootdir, rootdirfound);
3375 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3376 dir.unique = dirVnodeInfo->vnodes[i].unique;
3379 dir.parent = dirVnodeInfo->vnodes[i].parent;
3380 dir.haveDot = dir.haveDotDot = 0;
3381 dir.ds_linkH = alinkH;
3382 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3383 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3385 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3388 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3389 (Testing ? "skipping" : "salvaging"));
3392 CopyAndSalvage(salvinfo, &dir);
3394 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3397 dirHandle = dir.dirHandle;
3400 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3401 &dirVnodeInfo->vnodes[i], path);
3404 /* If enumeration failed for random reasons, we will probably delete
3405 * too much stuff, so we guard against this instead.
3407 struct judgeEntry_params judge_params;
3408 judge_params.salvinfo = salvinfo;
3409 judge_params.dir = &dir;
3411 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3414 /* Delete the old directory if it was copied in order to salvage.
3415 * CopyOnWrite has written the new inode # to the disk, but we still
3416 * have the old one in our local structure here. Thus, we idec the
3420 if (dir.copied && !Testing) {
3421 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3422 osi_Assert(code == 0);
3423 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3426 /* Remember rootdir DirSummary _after_ it has been judged */
3427 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3428 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3436 * Get a new FID that can be used to create a new file.
3438 * @param[in] volHeader vol header for the volume
3439 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3440 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3441 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3442 * updated to the new max unique if we create a new
3446 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3447 VnodeClass class, AFSFid *afid, Unique *maxunique)
3450 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3451 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3455 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3456 /* no free vnodes; make a new one */
3457 salvinfo->vnodeInfo[class].nVnodes++;
3458 salvinfo->vnodeInfo[class].vnodes =
3459 realloc(salvinfo->vnodeInfo[class].vnodes,
3460 sizeof(struct VnodeEssence) * (i+1));
3462 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3465 afid->Vnode = bitNumberToVnodeNumber(i, class);
3467 if (volHeader->uniquifier < (*maxunique + 1)) {
3468 /* header uniq is bad; it will get bumped by 2000 later */
3469 afid->Unique = *maxunique + 1 + 2000;
3472 /* header uniq seems okay; just use that */
3473 afid->Unique = *maxunique = volHeader->uniquifier++;
3478 * Create a vnode for a README file explaining not to use a recreated-root vol.
3480 * @param[in] volHeader vol header for the volume
3481 * @param[in] alinkH ihandle for i/o for the volume
3482 * @param[in] vid volume id
3483 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3484 * updated to the new max unique if we create a new
3486 * @param[out] afid FID for the new readme vnode
3487 * @param[out] ainode the inode for the new readme file
3489 * @return operation status
3494 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3495 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3499 struct VnodeDiskObject *rvnode = NULL;
3501 IHandle_t *readmeH = NULL;
3502 struct VnodeEssence *vep;
3504 time_t now = time(NULL);
3506 /* Try to make the note brief, but informative. Only administrators should
3507 * be able to read this file at first, so we can hopefully assume they
3508 * know what AFS is, what a volume is, etc. */
3510 "This volume has been salvaged, but has lost its original root directory.\n"
3511 "The root directory that exists now has been recreated from orphan files\n"
3512 "from the rest of the volume. This recreated root directory may interfere\n"
3513 "with old cached data on clients, and there is no way the salvager can\n"
3514 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3515 "use this volume, but only copy the salvaged data to a new volume.\n"
3516 "Continuing to use this volume as it exists now may cause some clients to\n"
3517 "behave oddly when accessing this volume.\n"
3518 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3519 /* ^ the person reading this probably just lost some data, so they could
3520 * use some cheering up. */
3522 /* -1 for the trailing NUL */
3523 length = sizeof(readme) - 1;
3525 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3527 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3529 /* create the inode and write the contents */
3530 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3531 salvinfo->fileSysPath, 0, vid,
3532 afid->Vnode, afid->Unique, 1);
3533 if (!VALID_INO(readmeinode)) {
3534 Log("CreateReadme: readme IH_CREATE failed\n");
3538 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3539 bytes = IH_IWRITE(readmeH, 0, readme, length);
3540 IH_RELEASE(readmeH);
3542 if (bytes != length) {
3543 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3544 (int)sizeof(readme));
3548 /* create the vnode and write it out */
3549 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3551 Log("CreateRootDir: error alloc'ing memory\n");
3555 rvnode->type = vFile;
3557 rvnode->modeBits = 0777;
3558 rvnode->linkCount = 1;
3559 VNDISK_SET_LEN(rvnode, length);
3560 rvnode->uniquifier = afid->Unique;
3561 rvnode->dataVersion = 1;
3562 VNDISK_SET_INO(rvnode, readmeinode);
3563 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3568 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3570 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3571 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3572 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3574 if (bytes != SIZEOF_SMALLDISKVNODE) {
3575 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3576 (int)SIZEOF_SMALLDISKVNODE);
3580 /* update VnodeEssence for new readme vnode */
3581 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3583 vep->blockCount = nBlocks(length);
3584 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3585 vep->parent = rvnode->parent;
3586 vep->unique = rvnode->uniquifier;
3587 vep->modeBits = rvnode->modeBits;
3588 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3589 vep->type = rvnode->type;
3590 vep->author = rvnode->author;
3591 vep->owner = rvnode->owner;
3592 vep->group = rvnode->group;
3602 *ainode = readmeinode;
3607 if (IH_DEC(alinkH, readmeinode, vid)) {
3608 Log("CreateReadme (recovery): IH_DEC failed\n");
3620 * create a root dir for a volume that lacks one.
3622 * @param[in] volHeader vol header for the volume
3623 * @param[in] alinkH ihandle for disk access for this volume group
3624 * @param[in] vid volume id we're dealing with
3625 * @param[out] rootdir populated with info about the new root dir
3626 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3627 * updated to the new max unique if we create a new
3630 * @return operation status
3635 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3636 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3640 int decroot = 0, decreadme = 0;
3641 AFSFid did, readmeid;
3644 struct VnodeDiskObject *rootvnode = NULL;
3645 struct acl_accessList *ACL;
3648 struct VnodeEssence *vep;
3650 time_t now = time(NULL);
3652 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3653 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3657 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3658 /* We don't have any large vnodes in the volume; allocate room
3659 * for one so we can recreate the root dir */
3660 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3661 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3662 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3664 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3665 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3668 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3669 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3670 if (vep->type != vNull) {
3671 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3675 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3676 &readmeinode) != 0) {
3681 /* set the DV to a very high number, so it is unlikely that we collide
3682 * with a cached DV */
3685 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3687 if (!VALID_INO(rootinode)) {
3688 Log("CreateRootDir: IH_CREATE failed\n");
3693 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3694 rootinode, &salvinfo->VolumeChanged);
3698 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3699 Log("CreateRootDir: MakeDir failed\n");
3702 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3703 Log("CreateRootDir: Create failed\n");
3707 length = Length(&rootdir->dirHandle);
3708 DZap((void *)&rootdir->dirHandle);
3710 /* create the new root dir vnode */
3711 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3713 Log("CreateRootDir: malloc failed\n");
3717 /* only give 'rl' permissions to 'system:administrators'. We do this to
3718 * try to catch the attention of an administrator, that they should not
3719 * be writing to this directory or continue to use it. */
3720 ACL = VVnodeDiskACL(rootvnode);
3721 ACL->size = sizeof(struct acl_accessList);
3722 ACL->version = ACL_ACLVERSION;
3726 ACL->entries[0].id = -204; /* system:administrators */
3727 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3729 rootvnode->type = vDirectory;
3730 rootvnode->cloned = 0;
3731 rootvnode->modeBits = 0777;
3732 rootvnode->linkCount = 2;
3733 VNDISK_SET_LEN(rootvnode, length);
3734 rootvnode->uniquifier = 1;
3735 rootvnode->dataVersion = dv;
3736 VNDISK_SET_INO(rootvnode, rootinode);
3737 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3738 rootvnode->author = 0;
3739 rootvnode->owner = 0;
3740 rootvnode->parent = 0;
3741 rootvnode->group = 0;
3742 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3744 /* write it out to disk */
3745 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3746 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3747 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3749 if (bytes != SIZEOF_LARGEDISKVNODE) {
3750 /* just cast to int and don't worry about printing real 64-bit ints;
3751 * a large disk vnode isn't anywhere near the 32-bit limit */
3752 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3753 (int)SIZEOF_LARGEDISKVNODE);
3757 /* update VnodeEssence for the new root vnode */
3758 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3760 vep->blockCount = nBlocks(length);
3761 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3762 vep->parent = rootvnode->parent;
3763 vep->unique = rootvnode->uniquifier;
3764 vep->modeBits = rootvnode->modeBits;
3765 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3766 vep->type = rootvnode->type;
3767 vep->author = rootvnode->author;
3768 vep->owner = rootvnode->owner;
3769 vep->group = rootvnode->group;
3779 /* update DirSummary for the new root vnode */
3780 rootdir->vnodeNumber = 1;
3781 rootdir->unique = 1;
3782 rootdir->haveDot = 1;
3783 rootdir->haveDotDot = 1;
3784 rootdir->rwVid = vid;
3785 rootdir->copied = 0;
3786 rootdir->parent = 0;
3787 rootdir->name = strdup(".");
3788 rootdir->vname = volHeader->name;
3789 rootdir->ds_linkH = alinkH;
3796 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3797 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3799 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3800 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3810 * salvage a volume group.
3812 * @param[in] salvinfo information for the curent salvage job
3813 * @param[in] rwIsp inode summary for rw volume
3814 * @param[in] alinkH link table inode handle
3816 * @return operation status
3820 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3822 /* This routine, for now, will only be called for read-write volumes */
3824 int BlocksInVolume = 0, FilesInVolume = 0;
3826 struct DirSummary rootdir, oldrootdir;
3827 struct VnodeInfo *dirVnodeInfo;
3828 struct VnodeDiskObject vnode;
3829 VolumeDiskData volHeader;
3831 int orphaned, rootdirfound = 0;
3832 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3833 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3834 struct VnodeEssence *vep;
3837 afs_sfsize_t nBytes;
3839 VnodeId LFVnode, ThisVnode;
3840 Unique LFUnique, ThisUnique;
3844 vid = rwIsp->volSummary->header.id;
3845 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3846 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3847 osi_Assert(nBytes == sizeof(volHeader));
3848 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3849 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3850 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3852 DistilVnodeEssence(salvinfo, vid, vLarge,
3853 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3854 DistilVnodeEssence(salvinfo, vid, vSmall,
3855 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3857 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3858 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3859 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3860 &rootdir, &rootdirfound);
3863 nt_sync(salvinfo->fileSysDevice);
3865 sync(); /* This used to be done lower level, for every dir */
3872 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3874 Log("Cannot find root directory for volume %lu; attempting to create "
3875 "a new one\n", afs_printable_uint32_lu(vid));
3877 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3882 salvinfo->VolumeChanged = 1;
3886 /* Parse each vnode looking for orphaned vnodes and
3887 * connect them to the tree as orphaned (if requested).
3889 oldrootdir = rootdir;
3890 for (class = 0; class < nVNODECLASSES; class++) {
3891 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3892 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3893 ThisVnode = bitNumberToVnodeNumber(v, class);
3894 ThisUnique = vep->unique;
3896 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3897 continue; /* Ignore unused, claimed, and root vnodes */
3899 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3900 * entry in this vnode had incremented the parent link count (In
3901 * JudgeEntry()). We need to go to the parent and decrement that
3902 * link count. But if the parent's unique is zero, then the parent
3903 * link count was not incremented in JudgeEntry().
3905 if (class == vLarge) { /* directory vnode */
3906 pv = vnodeIdToBitNumber(vep->parent);
3907 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3908 if (vep->parent == 1 && newrootdir) {
3909 /* this vnode's parent was the volume root, and
3910 * we just created the volume root. So, the parent
3911 * dir didn't exist during JudgeEntry, so the link
3912 * count was not inc'd there, so don't dec it here.
3918 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3924 continue; /* If no rootdir, can't attach orphaned files */
3926 /* Here we attach orphaned files and directories into the
3927 * root directory, LVVnode, making sure link counts stay correct.
3929 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3930 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3931 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3933 /* Update this orphaned vnode's info. Its parent info and
3934 * link count (do for orphaned directories and files).
3936 vep->parent = LFVnode; /* Parent is the root dir */
3937 vep->unique = LFUnique;
3940 vep->count--; /* Inc link count (root dir will pt to it) */
3942 /* If this orphaned vnode is a directory, change '..'.
3943 * The name of the orphaned dir/file is unknown, so we
3944 * build a unique name. No need to CopyOnWrite the directory
3945 * since it is not connected to tree in BK or RO volume and
3946 * won't be visible there.
3948 if (class == vLarge) {
3952 /* Remove and recreate the ".." entry in this orphaned directory */
3953 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3954 salvinfo->vnodeInfo[class].inodes[v],
3955 &salvinfo->VolumeChanged);
3957 pa.Unique = LFUnique;
3958 osi_Assert(Delete(&dh, "..") == 0);
3959 osi_Assert(Create(&dh, "..", &pa) == 0);
3961 /* The original parent's link count was decremented above.
3962 * Here we increment the new parent's link count.
3964 pv = vnodeIdToBitNumber(LFVnode);
3965 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3969 /* Go to the root dir and add this entry. The link count of the
3970 * root dir was incremented when ".." was created. Try 10 times.
3972 for (j = 0; j < 10; j++) {
3973 pa.Vnode = ThisVnode;
3974 pa.Unique = ThisUnique;
3976 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3978 vLarge) ? "__ORPHANDIR__" :
3979 "__ORPHANFILE__"), ThisVnode,
3982 CopyOnWrite(salvinfo, &rootdir);
3983 code = Create(&rootdir.dirHandle, npath, &pa);
3987 ThisUnique += 50; /* Try creating a different file */
3989 osi_Assert(code == 0);
3990 Log("Attaching orphaned %s to volume's root dir as %s\n",
3991 ((class == vLarge) ? "directory" : "file"), npath);
3993 } /* for each vnode in the class */
3994 } /* for each class of vnode */
3996 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3998 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4000 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4002 osi_Assert(code == 0);
4003 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4006 DFlush(); /* Flush the changes */
4007 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4008 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4009 orphans = ORPH_IGNORE;
4012 /* Write out all changed vnodes. Orphaned files and directories
4013 * will get removed here also (if requested).
4015 for (class = 0; class < nVNODECLASSES; class++) {
4016 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4017 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4018 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4019 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4020 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4021 for (i = 0; i < nVnodes; i++) {
4022 struct VnodeEssence *vnp = &vnodes[i];
4023 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4025 /* If the vnode is good but is unclaimed (not listed in
4026 * any directory entries), then it is orphaned.
4029 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4030 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4034 if (vnp->changed || vnp->count) {
4037 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4038 vnodeIndexOffset(vcp, vnodeNumber),
4039 (char *)&vnode, sizeof(vnode));
4040 osi_Assert(nBytes == sizeof(vnode));
4042 vnode.parent = vnp->parent;
4043 oldCount = vnode.linkCount;
4044 vnode.linkCount = vnode.linkCount - vnp->count;
4047 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4049 if (!vnp->todelete) {
4050 /* Orphans should have already been attached (if requested) */
4051 osi_Assert(orphans != ORPH_ATTACH);
4052 oblocks += vnp->blockCount;
4055 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4057 BlocksInVolume -= vnp->blockCount;
4059 if (VNDISK_GET_INO(&vnode)) {
4061 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4062 osi_Assert(code == 0);
4064 memset(&vnode, 0, sizeof(vnode));
4066 } else if (vnp->count) {
4068 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4071 vnode.modeBits = vnp->modeBits;
4074 vnode.dataVersion++;
4077 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4078 vnodeIndexOffset(vcp, vnodeNumber),
4079 (char *)&vnode, sizeof(vnode));
4080 osi_Assert(nBytes == sizeof(vnode));
4082 salvinfo->VolumeChanged = 1;
4086 if (!Showmode && ofiles) {
4087 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4089 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4093 for (class = 0; class < nVNODECLASSES; class++) {
4094 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4095 for (i = 0; i < vip->nVnodes; i++)
4096 if (vip->vnodes[i].name)
4097 free(vip->vnodes[i].name);
4104 /* Set correct resource utilization statistics */
4105 volHeader.filecount = FilesInVolume;
4106 volHeader.diskused = BlocksInVolume;
4108 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4109 if (volHeader.uniquifier < (maxunique + 1)) {
4111 Log("Volume uniquifier is too low; fixed\n");
4112 /* Plus 2,000 in case there are workstations out there with
4113 * cached vnodes that have since been deleted
4115 volHeader.uniquifier = (maxunique + 1 + 2000);
4119 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4120 "Only use this salvaged volume to copy data to another volume; "
4121 "do not continue to use this volume (%lu) as-is.\n",
4122 afs_printable_uint32_lu(vid));
4125 #ifdef FSSYNC_BUILD_CLIENT
4126 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4127 afs_int32 fsync_code;
4129 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4131 Log("Error trying to tell the fileserver to break callbacks for "
4132 "changed volume %lu; error code %ld\n",
4133 afs_printable_uint32_lu(vid),
4134 afs_printable_int32_ld(fsync_code));
4136 salvinfo->VolumeChanged = 0;
4139 #endif /* FSSYNC_BUILD_CLIENT */
4141 /* Turn off the inUse bit; the volume's been salvaged! */
4142 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4143 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4144 volHeader.inService = 1; /* allow service again */
4145 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4146 volHeader.dontSalvage = DONT_SALVAGE;
4147 salvinfo->VolumeChanged = 0;
4149 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4150 osi_Assert(nBytes == sizeof(volHeader));
4153 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4154 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4155 FilesInVolume, BlocksInVolume);
4158 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4159 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4165 ClearROInUseBit(struct VolumeSummary *summary)
4167 IHandle_t *h = summary->volumeInfoHandle;
4168 afs_sfsize_t nBytes;
4170 VolumeDiskData volHeader;
4172 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4173 osi_Assert(nBytes == sizeof(volHeader));
4174 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4175 volHeader.inUse = 0;
4176 volHeader.needsSalvaged = 0;
4177 volHeader.inService = 1;
4178 volHeader.dontSalvage = DONT_SALVAGE;
4180 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4181 osi_Assert(nBytes == sizeof(volHeader));
4186 * Possible delete the volume.
4188 * deleteMe - Always do so, only a partial volume.
4191 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4192 char *message, int deleteMe, int check)
4194 if (readOnly(isp) || deleteMe) {
4195 if (isp->volSummary && isp->volSummary->fileName) {
4198 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4200 Log("It will be deleted on this server (you may find it elsewhere)\n");
4203 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4205 Log("it will be deleted instead. It should be recloned.\n");
4210 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4212 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4214 Log("Error %ld destroying volume disk header for volume %lu\n",
4215 afs_printable_int32_ld(code),
4216 afs_printable_uint32_lu(isp->volumeId));
4219 /* make sure we actually delete the fileName file; ENOENT
4220 * is fine, since VDestroyVolumeDiskHeader probably already
4222 if (unlink(path) && errno != ENOENT) {
4223 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4225 if (salvinfo->useFSYNC) {
4226 AskDelete(salvinfo, isp->volumeId);
4228 isp->volSummary->deleted = 1;
4231 } else if (!check) {
4232 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4234 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4238 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4240 * Locks a volume on disk for salvaging.
4242 * @param[in] volumeId volume ID to lock
4244 * @return operation status
4246 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4247 * checked out and locked again
4252 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4257 /* should always be WRITE_LOCK, but keep the lock-type logic all
4258 * in one place, in VVolLockType. Params will be ignored, but
4259 * try to provide what we're logically doing. */
4260 locktype = VVolLockType(V_VOLUPD, 1);
4262 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4264 if (code == EBUSY) {
4265 Abort("Someone else appears to be using volume %lu; Aborted\n",
4266 afs_printable_uint32_lu(volumeId));
4268 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4269 afs_printable_int32_ld(code),
4270 afs_printable_uint32_lu(volumeId));
4273 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4274 if (code == SYNC_DENIED) {
4275 /* need to retry checking out volumes */
4278 if (code != SYNC_OK) {
4279 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4280 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4283 /* set inUse = programType in the volume header to ensure that nobody
4284 * tries to use this volume again without salvaging, if we somehow crash
4285 * or otherwise exit before finishing the salvage.
4289 struct VolumeHeader header;
4290 struct VolumeDiskHeader diskHeader;
4291 struct VolumeDiskData volHeader;
4293 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4298 DiskToVolumeHeader(&header, &diskHeader);
4300 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4301 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4302 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4308 volHeader.inUse = programType;
4310 /* If we can't re-write the header, bail out and error. We don't
4311 * assert when reading the header, since it's possible the
4312 * header isn't really there (when there's no data associated
4313 * with the volume; we just delete the vol header file in that
4314 * case). But if it's there enough that we can read it, but
4315 * somehow we cannot write to it to signify we're salvaging it,
4316 * we've got a big problem and we cannot continue. */
4317 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4324 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4327 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4332 memset(&res, 0, sizeof(res));
4334 for (i = 0; i < 3; i++) {
4335 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4336 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4338 if (code == SYNC_OK) {
4340 } else if (code == SYNC_DENIED) {
4342 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4344 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4345 Abort("Salvage aborted\n");
4346 } else if (code == SYNC_BAD_COMMAND) {
4347 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4350 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4351 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4353 Log("AskOffline: fileserver is DAFS but we are not.\n");
4356 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4357 Log("AskOffline: fileserver is not DAFS but we are.\n");
4359 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4362 Abort("Salvage aborted\n");
4365 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4366 FSYNC_clientFinis();
4370 if (code != SYNC_OK) {
4371 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4372 Abort("Salvage aborted\n");
4376 /* don't want to pass around state; remember it here */
4377 static int isDAFS = -1;
4381 afs_int32 code, i, ret = 0;
4384 /* we don't care if we race. the answer shouldn't change */
4388 memset(&res, 0, sizeof(res));
4390 for (i = 0; i < 3; i++) {
4391 code = FSYNC_VolOp(1, NULL,
4392 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4394 if (code == SYNC_OK) {
4397 } else if (code == SYNC_DENIED) {
4400 } else if (code == SYNC_BAD_COMMAND) {
4403 } else if (code == SYNC_FAILED) {
4404 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4411 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4412 FSYNC_clientFinis();
4422 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4424 struct VolumeDiskHeader diskHdr;
4426 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4428 /* volume probably does not exist; no need to bring back online */
4431 AskOnline(salvinfo, volumeId);
4435 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4439 for (i = 0; i < 3; i++) {
4440 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4441 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4443 if (code == SYNC_OK) {
4445 } else if (code == SYNC_DENIED) {
4446 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4447 } else if (code == SYNC_BAD_COMMAND) {
4448 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4450 Log("AskOnline: please make sure file server binaries are same version.\n");
4454 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4455 FSYNC_clientFinis();
4462 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4467 for (i = 0; i < 3; i++) {
4468 memset(&res, 0, sizeof(res));
4469 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4470 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4472 if (code == SYNC_OK) {
4474 } else if (code == SYNC_DENIED) {
4475 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4476 } else if (code == SYNC_BAD_COMMAND) {
4477 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4480 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4481 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4483 Log("AskOnline: fileserver is DAFS but we are not.\n");
4486 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4487 Log("AskOnline: fileserver is not DAFS but we are.\n");
4489 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4493 } else if (code == SYNC_FAILED &&
4494 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4495 res.hdr.reason == FSYNC_WRONG_PART)) {
4496 /* volume is already effectively 'deleted' */
4500 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4501 FSYNC_clientFinis();
4508 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4510 /* Volume parameter is passed in case iopen is upgraded in future to
4511 * require a volume Id to be passed
4514 IHandle_t *srcH, *destH;
4515 FdHandle_t *srcFdP, *destFdP;
4517 afs_foff_t size = 0;
4519 IH_INIT(srcH, device, rwvolume, inode1);
4520 srcFdP = IH_OPEN(srcH);
4521 osi_Assert(srcFdP != NULL);
4522 IH_INIT(destH, device, rwvolume, inode2);
4523 destFdP = IH_OPEN(destH);
4524 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4525 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4528 osi_Assert(nBytes == 0);
4529 FDH_REALLYCLOSE(srcFdP);
4530 FDH_REALLYCLOSE(destFdP);
4537 PrintInodeList(struct SalvInfo *salvinfo)
4539 struct ViceInodeInfo *ip;
4540 struct ViceInodeInfo *buf;
4543 afs_sfsize_t st_size;
4545 st_size = OS_SIZE(salvinfo->inodeFd);
4546 osi_Assert(st_size >= 0);
4547 buf = (struct ViceInodeInfo *)malloc(st_size);
4548 osi_Assert(buf != NULL);
4549 nInodes = st_size / sizeof(struct ViceInodeInfo);
4550 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4551 for (ip = buf; nInodes--; ip++) {
4552 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4553 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4554 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4555 ip->u.param[2], ip->u.param[3]);
4561 PrintInodeSummary(struct SalvInfo *salvinfo)
4564 struct InodeSummary *isp;
4566 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4567 isp = &salvinfo->inodeSummary[i];
4568 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4573 PrintVolumeSummary(struct SalvInfo *salvinfo)
4576 struct VolumeSummary *vsp;
4578 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4579 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4589 osi_Assert(0); /* Fork is never executed in the NT code path */
4593 #ifdef AFS_DEMAND_ATTACH_FS
4594 if ((f == 0) && (programType == salvageServer)) {
4595 /* we are a salvageserver child */
4596 #ifdef FSSYNC_BUILD_CLIENT
4597 VChildProcReconnectFS_r();
4599 #ifdef SALVSYNC_BUILD_CLIENT
4603 #endif /* AFS_DEMAND_ATTACH_FS */
4604 #endif /* !AFS_NT40_ENV */
4614 #ifdef AFS_DEMAND_ATTACH_FS
4615 if (programType == salvageServer) {
4616 #ifdef SALVSYNC_BUILD_CLIENT
4619 #ifdef FSSYNC_BUILD_CLIENT
4623 #endif /* AFS_DEMAND_ATTACH_FS */
4626 if (main_thread != pthread_self())
4627 pthread_exit((void *)code);
4640 pid = wait(&status);
4641 osi_Assert(pid != -1);
4642 if (WCOREDUMP(status))
4643 Log("\"%s\" core dumped!\n", prog);
4644 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4650 TimeStamp(time_t clock, int precision)
4653 static char timestamp[20];
4654 lt = localtime(&clock);
4656 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4658 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4663 CheckLogFile(char * log_path)
4665 char oldSlvgLog[AFSDIR_PATH_MAX];
4667 #ifndef AFS_NT40_ENV
4674 strcpy(oldSlvgLog, log_path);
4675 strcat(oldSlvgLog, ".old");
4677 renamefile(log_path, oldSlvgLog);
4678 logFile = afs_fopen(log_path, "a");
4680 if (!logFile) { /* still nothing, use stdout */
4684 #ifndef AFS_NAMEI_ENV
4685 AFS_DEBUG_IOPS_LOG(logFile);
4690 #ifndef AFS_NT40_ENV
4692 TimeStampLogFile(char * log_path)
4694 char stampSlvgLog[AFSDIR_PATH_MAX];
4699 lt = localtime(&now);
4700 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4701 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4702 log_path, lt->tm_year + 1900,
4703 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4706 /* try to link the logfile to a timestamped filename */
4707 /* if it fails, oh well, nothing we can do */
4708 link(log_path, stampSlvgLog);
4717 #ifndef AFS_NT40_ENV
4719 printf("Can't show log since using syslog.\n");
4730 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4733 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4736 while (fgets(line, sizeof(line), logFile))
4743 Log(const char *format, ...)
4749 va_start(args, format);
4750 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4752 #ifndef AFS_NT40_ENV
4754 syslog(LOG_INFO, "%s", tmp);
4758 gettimeofday(&now, 0);
4759 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4765 Abort(const char *format, ...)
4770 va_start(args, format);
4771 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4773 #ifndef AFS_NT40_ENV
4775 syslog(LOG_INFO, "%s", tmp);
4779 fprintf(logFile, "%s", tmp);
4791 ToString(const char *s)
4794 p = (char *)malloc(strlen(s) + 1);
4795 osi_Assert(p != NULL);
4800 /* Remove the FORCESALVAGE file */
4802 RemoveTheForce(char *path)
4805 struct afs_stat_st force; /* so we can use afs_stat to find it */
4806 strcpy(target,path);
4807 strcat(target,"/FORCESALVAGE");
4808 if (!Testing && ForceSalvage) {
4809 if (afs_stat(target,&force) == 0) unlink(target);
4813 #ifndef AFS_AIX32_ENV
4815 * UseTheForceLuke - see if we can use the force
4818 UseTheForceLuke(char *path)
4820 struct afs_stat_st force;
4822 strcpy(target,path);
4823 strcat(target,"/FORCESALVAGE");
4825 return (afs_stat(target, &force) == 0);
4829 * UseTheForceLuke - see if we can use the force
4832 * The VRMIX fsck will not muck with the filesystem it is supposedly
4833 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4834 * muck directly with the root inode, which is within the normal
4836 * ListViceInodes() has a side effect of setting ForceSalvage if
4837 * it detects a need, based on root inode examination.
4840 UseTheForceLuke(char *path)
4843 return 0; /* sorry OB1 */
4848 /* NT support routines */
4850 static char execpathname[MAX_PATH];
4852 nt_SalvagePartition(char *partName, int jobn)
4857 if (!*execpathname) {
4858 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4859 if (!n || n == 1023)
4862 job.cj_magic = SALVAGER_MAGIC;
4863 job.cj_number = jobn;
4864 (void)strcpy(job.cj_part, partName);
4865 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4870 nt_SetupPartitionSalvage(void *datap, int len)
4872 childJob_t *jobp = (childJob_t *) datap;
4873 char logname[AFSDIR_PATH_MAX];
4875 if (len != sizeof(childJob_t))
4877 if (jobp->cj_magic != SALVAGER_MAGIC)
4882 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4884 logFile = afs_fopen(logname, "w");
4892 #endif /* AFS_NT40_ENV */