2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
192 #include "vol_internal.h"
194 #include <afs/prs_fs.h>
196 #ifdef FSSYNC_BUILD_CLIENT
197 #include "vg_cache.h"
204 /*@+fcnmacros +macrofcndecl@*/
207 extern off64_t afs_lseek(int FD, off64_t O, int F);
208 #endif /*S_SPLINT_S */
209 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
210 #define afs_stat stat64
211 #define afs_fstat fstat64
212 #define afs_open open64
213 #define afs_fopen fopen64
214 #else /* !O_LARGEFILE */
216 extern off_t afs_lseek(int FD, off_t O, int F);
217 #endif /*S_SPLINT_S */
218 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
219 #define afs_stat stat
220 #define afs_fstat fstat
221 #define afs_open open
222 #define afs_fopen fopen
223 #endif /* !O_LARGEFILE */
224 /*@=fcnmacros =macrofcndecl@*/
227 extern void *calloc();
229 static char *TimeStamp(time_t clock, int precision);
232 int debug; /* -d flag */
233 extern int Testing; /* -n flag */
234 int ListInodeOption; /* -i flag */
235 int ShowRootFiles; /* -r flag */
236 int RebuildDirs; /* -sal flag */
237 int Parallel = 4; /* -para X flag */
238 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
239 int forceR = 0; /* -b flag */
240 int ShowLog = 0; /* -showlog flag */
241 int ShowSuid = 0; /* -showsuid flag */
242 int ShowMounts = 0; /* -showmounts flag */
243 int orphans = ORPH_IGNORE; /* -orphans option */
248 int useSyslog = 0; /* -syslog flag */
249 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
258 #define MAXPARALLEL 32
260 int OKToZap; /* -o flag */
261 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
262 * in the volume header */
264 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
266 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
268 Device fileSysDevice; /* The device number of the current
269 * partition being salvaged */
273 char *fileSysPath; /* The path of the mounted partition currently
274 * being salvaged, i.e. the directory
275 * containing the volume headers */
277 char *fileSysPathName; /* NT needs this to make name pretty in log. */
278 IHandle_t *VGLinkH; /* Link handle for current volume group. */
279 int VGLinkH_cnt; /* # of references to lnk handle. */
280 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
282 char *fileSysDeviceName; /* The block device where the file system
283 * being salvaged was mounted */
284 char *filesysfulldev;
286 int VolumeChanged; /* Set by any routine which would change the volume in
287 * a way which would require callback is to be broken if the
288 * volume was put back on line by an active file server */
290 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
292 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
293 int inodeFd; /* File descriptor for inode file */
296 struct VnodeInfo vnodeInfo[nVNODECLASSES];
299 struct VolumeSummary *volumeSummaryp = NULL; /* Holds all the volumes in a part */
300 int nVolumes; /* Number of volumes (read-write and read-only)
301 * in volume summary */
307 /* Forward declarations */
308 /*@printflike@*/ void Log(const char *format, ...);
309 /*@printflike@*/ void Abort(const char *format, ...);
310 static int IsVnodeOrphaned(VnodeId vnode);
311 static int AskVolumeSummary(VolumeId singleVolumeNumber);
313 #ifdef AFS_DEMAND_ATTACH_FS
314 static int LockVolume(VolumeId volumeId);
315 #endif /* AFS_DEMAND_ATTACH_FS */
317 /* Uniquifier stored in the Inode */
322 return (u & 0x3fffff);
324 #if defined(AFS_SGI_EXMAG)
325 return (u & SGI_UNIQMASK);
328 #endif /* AFS_SGI_EXMAG */
333 BadError(register int aerror)
335 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
337 return 0; /* otherwise may be transient, e.g. EMFILE */
342 char *save_args[MAX_ARGS];
344 extern pthread_t main_thread;
345 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
349 * Get the salvage lock if not already held. Hold until process exits.
351 * @param[in] locktype READ_LOCK or WRITE_LOCK
354 _ObtainSalvageLock(int locktype)
356 struct VLockFile salvageLock;
361 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
363 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
366 "salvager: There appears to be another salvager running! "
371 "salvager: Error %d trying to acquire salvage lock! "
377 ObtainSalvageLock(void)
379 _ObtainSalvageLock(WRITE_LOCK);
382 ObtainSharedSalvageLock(void)
384 _ObtainSalvageLock(READ_LOCK);
388 #ifdef AFS_SGI_XFS_IOPS_ENV
389 /* Check if the given partition is mounted. For XFS, the root inode is not a
390 * constant. So we check the hard way.
393 IsPartitionMounted(char *part)
396 struct mntent *mntent;
398 assert(mntfp = setmntent(MOUNTED, "r"));
399 while (mntent = getmntent(mntfp)) {
400 if (!strcmp(part, mntent->mnt_dir))
405 return mntent ? 1 : 1;
408 /* Check if the given inode is the root of the filesystem. */
409 #ifndef AFS_SGI_XFS_IOPS_ENV
411 IsRootInode(struct afs_stat *status)
414 * The root inode is not a fixed value in XFS partitions. So we need to
415 * see if the partition is in the list of mounted partitions. This only
416 * affects the SalvageFileSys path, so we check there.
418 return (status->st_ino == ROOTINODE);
423 #ifndef AFS_NAMEI_ENV
424 /* We don't want to salvage big files filesystems, since we can't put volumes on
428 CheckIfBigFilesFS(char *mountPoint, char *devName)
430 struct superblock fs;
433 if (strncmp(devName, "/dev/", 5)) {
434 (void)sprintf(name, "/dev/%s", devName);
436 (void)strcpy(name, devName);
439 if (ReadSuper(&fs, name) < 0) {
440 Log("Unable to read superblock. Not salvaging partition %s.\n",
444 if (IsBigFilesFileSystem(&fs)) {
445 Log("Partition %s is a big files filesystem, not salvaging.\n",
455 #define HDSTR "\\Device\\Harddisk"
456 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
458 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
463 static int dowarn = 1;
465 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
467 if (strncmp(res, HDSTR, HDLEN)) {
470 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
471 res, HDSTR, p1->devName);
475 d1 = atoi(&res[HDLEN]);
477 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
479 if (strncmp(res, HDSTR, HDLEN)) {
482 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
483 res, HDSTR, p2->devName);
487 d2 = atoi(&res[HDLEN]);
492 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
495 /* This assumes that two partitions with the same device number divided by
496 * PartsPerDisk are on the same disk.
499 SalvageFileSysParallel(struct DiskPartition64 *partP)
502 struct DiskPartition64 *partP;
503 int pid; /* Pid for this job */
504 int jobnumb; /* Log file job number */
505 struct job *nextjob; /* Next partition on disk to salvage */
507 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
508 struct job *thisjob = 0;
509 static int numjobs = 0;
510 static int jobcount = 0;
516 char logFileName[256];
520 /* We have a partition to salvage. Copy it into thisjob */
521 thisjob = (struct job *)malloc(sizeof(struct job));
523 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
526 memset(thisjob, 0, sizeof(struct job));
527 thisjob->partP = partP;
528 thisjob->jobnumb = jobcount;
530 } else if (jobcount == 0) {
531 /* We are asking to wait for all jobs (partp == 0), yet we never
534 Log("No file system partitions named %s* found; not salvaged\n",
535 VICE_PARTITION_PREFIX);
539 if (debug || Parallel == 1) {
541 SalvageFileSys(thisjob->partP, 0);
548 /* Check to see if thisjob is for a disk that we are already
549 * salvaging. If it is, link it in as the next job to do. The
550 * jobs array has 1 entry per disk being salvages. numjobs is
551 * the total number of disks currently being salvaged. In
552 * order to keep thejobs array compact, when a disk is
553 * completed, the hightest element in the jobs array is moved
554 * down to now open slot.
556 for (j = 0; j < numjobs; j++) {
557 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
558 /* On same disk, add it to this list and return */
559 thisjob->nextjob = jobs[j]->nextjob;
560 jobs[j]->nextjob = thisjob;
567 /* Loop until we start thisjob or until all existing jobs are finished */
568 while (thisjob || (!partP && (numjobs > 0))) {
569 startjob = -1; /* No new job to start */
571 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
572 /* Either the max jobs are running or we have to wait for all
573 * the jobs to finish. In either case, we wait for at least one
574 * job to finish. When it's done, clean up after it.
576 pid = wait(&wstatus);
578 for (j = 0; j < numjobs; j++) { /* Find which job it is */
579 if (pid == jobs[j]->pid)
583 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
584 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
587 numjobs--; /* job no longer running */
588 oldjob = jobs[j]; /* remember */
589 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
590 free(oldjob); /* free the old job */
592 /* If there is another partition on the disk to salvage, then
593 * say we will start it (startjob). If not, then put thisjob there
594 * and say we will start it.
596 if (jobs[j]) { /* Another partitions to salvage */
597 startjob = j; /* Will start it */
598 } else { /* There is not another partition to salvage */
600 jobs[j] = thisjob; /* Add thisjob */
602 startjob = j; /* Will start it */
604 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
605 startjob = -1; /* Don't start it - already running */
609 /* We don't have to wait for a job to complete */
611 jobs[numjobs] = thisjob; /* Add this job */
613 startjob = numjobs; /* Will start it */
617 /* Start up a new salvage job on a partition in job slot "startjob" */
618 if (startjob != -1) {
620 Log("Starting salvage of file system partition %s\n",
621 jobs[startjob]->partP->name);
623 /* For NT, we not only fork, but re-exec the salvager. Pass in the
624 * commands and pass the child job number via the data path.
627 nt_SalvagePartition(jobs[startjob]->partP->name,
628 jobs[startjob]->jobnumb);
629 jobs[startjob]->pid = pid;
634 jobs[startjob]->pid = pid;
640 for (fd = 0; fd < 16; fd++)
647 openlog("salvager", LOG_PID, useSyslogFacility);
651 (void)afs_snprintf(logFileName, sizeof logFileName,
653 AFSDIR_SERVER_SLVGLOG_FILEPATH,
654 jobs[startjob]->jobnumb);
655 logFile = afs_fopen(logFileName, "w");
660 SalvageFileSys1(jobs[startjob]->partP, 0);
665 } /* while ( thisjob || (!partP && numjobs > 0) ) */
667 /* If waited for all jobs to complete, now collect log files and return */
669 if (!useSyslog) /* if syslogging - no need to collect */
672 for (i = 0; i < jobcount; i++) {
673 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
674 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
675 if ((passLog = afs_fopen(logFileName, "r"))) {
676 while (fgets(buf, sizeof(buf), passLog)) {
681 (void)unlink(logFileName);
690 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 if (!canfork || debug || Fork() == 0) {
693 SalvageFileSys1(partP, singleVolumeNumber);
694 if (canfork && !debug) {
699 Wait("SalvageFileSys");
703 get_DevName(char *pbuffer, char *wpath)
705 char pbuf[128], *ptr;
706 strcpy(pbuf, pbuffer);
707 ptr = (char *)strrchr(pbuf, '/');
713 ptr = (char *)strrchr(pbuffer, '/');
715 strcpy(pbuffer, ptr + 1);
722 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
725 char inodeListPath[256];
726 FILE *inodeFile = NULL;
727 static char tmpDevName[100];
728 static char wpath[100];
729 struct VolumeSummary *vsp, *esp;
740 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
741 Abort("Raced too many times with fileserver restarts while trying to "
742 "checkout/lock volumes; Aborted\n");
744 #ifdef AFS_DEMAND_ATTACH_FS
746 /* unlock all previous volume locks, since we're about to lock them
748 VLockFileReinit(&partP->volLockFile);
750 #endif /* AFS_DEMAND_ATTACH_FS */
752 fileSysPartition = partP;
753 fileSysDevice = fileSysPartition->device;
754 fileSysPathName = VPartitionPath(fileSysPartition);
757 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
758 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
759 name = partP->devName;
761 fileSysPath = fileSysPathName;
762 strcpy(tmpDevName, partP->devName);
763 name = get_DevName(tmpDevName, wpath);
764 fileSysDeviceName = name;
765 filesysfulldev = wpath;
768 if (singleVolumeNumber) {
769 #ifndef AFS_DEMAND_ATTACH_FS
770 /* only non-DAFS locks the partition when salvaging a single volume;
771 * DAFS will lock the individual volumes in the VG */
772 VLockPartition(partP->name);
773 #endif /* !AFS_DEMAND_ATTACH_FS */
777 /* salvageserver already setup fssync conn for us */
778 if ((programType != salvageServer) && !VConnectFS()) {
779 Abort("Couldn't connect to file server\n");
782 AskOffline(singleVolumeNumber, partP->name);
783 #ifdef AFS_DEMAND_ATTACH_FS
784 if (LockVolume(singleVolumeNumber)) {
787 #endif /* AFS_DEMAND_ATTACH_FS */
790 VLockPartition(partP->name);
794 ForceSalvage = UseTheForceLuke(fileSysPath);
797 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
798 partP->name, name, (Testing ? "(READONLY mode)" : ""));
800 Log("***Forced salvage of all volumes on this partition***\n");
805 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
812 assert((dirp = opendir(fileSysPath)) != NULL);
813 while ((dp = readdir(dirp))) {
814 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
815 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
817 Log("Removing old salvager temp files %s\n", dp->d_name);
818 strcpy(npath, fileSysPath);
820 strcat(npath, dp->d_name);
826 tdir = (tmpdir ? tmpdir : fileSysPath);
828 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
829 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
831 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
835 inodeFile = fopen(inodeListPath, "w+b");
837 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
840 /* Using nt_unlink here since we're really using the delete on close
841 * semantics of unlink. In most places in the salvager, we really do
842 * mean to unlink the file at that point. Those places have been
843 * modified to actually do that so that the NT crt can be used there.
845 code = nt_unlink(inodeListPath);
847 code = unlink(inodeListPath);
850 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
853 if (GetInodeSummary(inodeFile, singleVolumeNumber) < 0) {
857 inodeFd = fileno(inodeFile);
859 Abort("Temporary file %s is missing...\n", inodeListPath);
860 afs_lseek(inodeFd, 0L, SEEK_SET);
861 if (ListInodeOption) {
865 /* enumerate volumes in the partition.
866 * figure out sets of read-only + rw volumes.
867 * salvage each set, read-only volumes first, then read-write.
868 * Fix up inodes on last volume in set (whether it is read-write
871 if (GetVolumeSummary(singleVolumeNumber)) {
875 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
876 i < nVolumesInInodeFile; i = j) {
877 VolumeId rwvid = inodeSummary[i].RWvolumeId;
879 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
881 VolumeId vid = inodeSummary[j].volumeId;
882 struct VolumeSummary *tsp;
883 /* Scan volume list (from partition root directory) looking for the
884 * current rw volume number in the volume list from the inode scan.
885 * If there is one here that is not in the inode volume list,
887 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
889 DeleteExtraVolumeHeaderFile(vsp);
891 /* Now match up the volume summary info from the root directory with the
892 * entry in the volume list obtained from scanning inodes */
893 inodeSummary[j].volSummary = NULL;
894 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
895 if (tsp->header.id == vid) {
896 inodeSummary[j].volSummary = tsp;
902 /* Salvage the group of volumes (several read-only + 1 read/write)
903 * starting with the current read-only volume we're looking at.
905 SalvageVolumeGroup(&inodeSummary[i], j - i);
908 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
909 for (; vsp < esp; vsp++) {
911 DeleteExtraVolumeHeaderFile(vsp);
914 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
915 RemoveTheForce(fileSysPath);
917 if (!Testing && singleVolumeNumber) {
918 #ifdef AFS_DEMAND_ATTACH_FS
919 /* unlock vol headers so the fs can attach them when we AskOnline */
920 VLockFileReinit(&fileSysPartition->volLockFile);
921 #endif /* AFS_DEMAND_ATTACH_FS */
923 AskOnline(singleVolumeNumber, fileSysPartition->name);
925 /* Step through the volumeSummary list and set all volumes on-line.
926 * The volumes were taken off-line in GetVolumeSummary.
928 for (j = 0; j < nVolumes; j++) {
929 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
933 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
934 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
937 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
941 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
944 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
947 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
950 code = VDestroyVolumeDiskHeader(fileSysPartition, vsp->header.id, vsp->header.parent);
952 Log("Error %ld destroying volume disk header for volume %lu\n",
953 afs_printable_int32_ld(code),
954 afs_printable_uint32_lu(vsp->header.id));
957 /* make sure we actually delete the fileName file; ENOENT
958 * is fine, since VDestroyVolumeDiskHeader probably already
960 if (unlink(path) && errno != ENOENT) {
961 Log("Unable to unlink %s (errno = %d)\n", path, errno);
968 CompareInodes(const void *_p1, const void *_p2)
970 register const struct ViceInodeInfo *p1 = _p1;
971 register const struct ViceInodeInfo *p2 = _p2;
972 if (p1->u.vnode.vnodeNumber == INODESPECIAL
973 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
974 VolumeId p1rwid, p2rwid;
976 (p1->u.vnode.vnodeNumber ==
977 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
979 (p2->u.vnode.vnodeNumber ==
980 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
985 if (p1->u.vnode.vnodeNumber == INODESPECIAL
986 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
987 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
988 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
989 if (p1->u.vnode.volumeId == p1rwid)
991 if (p2->u.vnode.volumeId == p2rwid)
993 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
995 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
996 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
997 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
999 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1001 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1003 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1005 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1007 /* The following tests are reversed, so that the most desirable
1008 * of several similar inodes comes first */
1009 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1010 #ifdef AFS_3DISPARES
1011 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1012 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1015 #ifdef AFS_SGI_EXMAG
1016 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1017 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1022 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1023 #ifdef AFS_3DISPARES
1024 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1025 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1028 #ifdef AFS_SGI_EXMAG
1029 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1030 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1035 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1036 #ifdef AFS_3DISPARES
1037 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1038 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1041 #ifdef AFS_SGI_EXMAG
1042 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1043 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1048 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1049 #ifdef AFS_3DISPARES
1050 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1051 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1054 #ifdef AFS_SGI_EXMAG
1055 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1056 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1065 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
1066 register struct InodeSummary *summary)
1068 VolumeId volume = ip->u.vnode.volumeId;
1069 VolumeId rwvolume = volume;
1070 register int n, nSpecial;
1071 register Unique maxunique;
1074 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1076 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1078 rwvolume = ip->u.special.parentId;
1079 /* This isn't quite right, as there could (in error) be different
1080 * parent inodes in different special vnodes */
1082 if (maxunique < ip->u.vnode.vnodeUniquifier)
1083 maxunique = ip->u.vnode.vnodeUniquifier;
1087 summary->volumeId = volume;
1088 summary->RWvolumeId = rwvolume;
1089 summary->nInodes = n;
1090 summary->nSpecialInodes = nSpecial;
1091 summary->maxUniquifier = maxunique;
1095 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1097 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1098 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1099 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1104 * Collect list of inodes in file named by path. If a truly fatal error,
1105 * unlink the file and abort. For lessor errors, return -1. The file will
1106 * be unlinked by the caller.
1109 GetInodeSummary(FILE *inodeFile, VolumeId singleVolumeNumber)
1111 struct afs_stat status;
1114 struct ViceInodeInfo *ip;
1115 struct InodeSummary summary;
1116 char summaryFileName[50];
1119 char *dev = fileSysPath;
1120 char *wpath = fileSysPath;
1122 char *dev = fileSysDeviceName;
1123 char *wpath = filesysfulldev;
1125 char *part = fileSysPath;
1128 /* This file used to come from vfsck; cobble it up ourselves now... */
1130 ListViceInodes(dev, fileSysPath, inodeFile,
1131 singleVolumeNumber ? OnlyOneVolume : 0,
1132 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1134 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1137 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1139 if (forceSal && !ForceSalvage) {
1140 Log("***Forced salvage of all volumes on this partition***\n");
1143 fseek(inodeFile, 0L, SEEK_SET);
1144 inodeFd = fileno(inodeFile);
1145 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1146 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1148 tdir = (tmpdir ? tmpdir : part);
1150 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1151 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1153 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1154 "%s/salvage.temp.%d", tdir, getpid());
1156 summaryFile = afs_fopen(summaryFileName, "a+");
1157 if (summaryFile == NULL) {
1158 Abort("Unable to create inode summary file\n");
1162 /* Using nt_unlink here since we're really using the delete on close
1163 * semantics of unlink. In most places in the salvager, we really do
1164 * mean to unlink the file at that point. Those places have been
1165 * modified to actually do that so that the NT crt can be used there.
1167 code = nt_unlink(summaryFileName);
1169 code = unlink(summaryFileName);
1172 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1175 if (!canfork || debug || Fork() == 0) {
1177 unsigned long st_size=(unsigned long) status.st_size;
1178 nInodes = st_size / sizeof(struct ViceInodeInfo);
1180 fclose(summaryFile);
1181 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1182 RemoveTheForce(fileSysPath);
1184 struct VolumeSummary *vsp;
1187 GetVolumeSummary(singleVolumeNumber);
1189 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1191 DeleteExtraVolumeHeaderFile(vsp);
1194 Log("%s vice inodes on %s; not salvaged\n",
1195 singleVolumeNumber ? "No applicable" : "No", dev);
1198 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1200 fclose(summaryFile);
1202 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1205 if (read(inodeFd, ip, st_size) != st_size) {
1206 fclose(summaryFile);
1207 Abort("Unable to read inode table; %s not salvaged\n", dev);
1209 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1210 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1211 || write(inodeFd, ip, st_size) != st_size) {
1212 fclose(summaryFile);
1213 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1217 CountVolumeInodes(ip, nInodes, &summary);
1218 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1219 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1220 fclose(summaryFile);
1223 summary.index += (summary.nInodes);
1224 nInodes -= summary.nInodes;
1225 ip += summary.nInodes;
1227 /* Following fflush is not fclose, because if it was debug mode would not work */
1228 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1229 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1230 fclose(summaryFile);
1233 if (canfork && !debug) {
1238 if (Wait("Inode summary") == -1) {
1239 fclose(summaryFile);
1240 Exit(1); /* salvage of this partition aborted */
1243 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1244 if (status.st_size != 0) {
1246 unsigned long st_status=(unsigned long)status.st_size;
1247 inodeSummary = (struct InodeSummary *)malloc(st_status);
1248 assert(inodeSummary != NULL);
1249 /* For GNU we need to do lseek to get the file pointer moved. */
1250 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1251 ret = read(fileno(summaryFile), inodeSummary, st_status);
1252 assert(ret == st_status);
1254 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1255 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1256 fclose(summaryFile);
1260 /* Comparison routine for volume sort.
1261 This is setup so that a read-write volume comes immediately before
1262 any read-only clones of that volume */
1264 CompareVolumes(const void *_p1, const void *_p2)
1266 register const struct VolumeSummary *p1 = _p1;
1267 register const struct VolumeSummary *p2 = _p2;
1268 if (p1->header.parent != p2->header.parent)
1269 return p1->header.parent < p2->header.parent ? -1 : 1;
1270 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1272 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1274 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1278 * Gleans volumeSummary information by asking the fileserver
1280 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1281 * salvaging a whole partition
1283 * @return whether we obtained the volume summary information or not
1284 * @retval 0 success; we obtained the volume summary information
1285 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1287 * @retval 1 we did not get the volume summary information; either the
1288 * fileserver responded with an error, or we are not supposed to
1289 * ask the fileserver for the information (e.g. we are salvaging
1290 * the entire partition or we are not the salvageserver)
1292 * @note for non-DAFS, always returns 1
1295 AskVolumeSummary(VolumeId singleVolumeNumber)
1298 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1299 if (programType == salvageServer) {
1300 if (singleVolumeNumber) {
1301 FSSYNC_VGQry_response_t q_res;
1303 struct VolumeSummary *vsp;
1305 struct VolumeDiskHeader diskHdr;
1307 memset(&res, 0, sizeof(res));
1309 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1312 * We must wait for the partition to finish scanning before
1313 * can continue, since we will not know if we got the entire
1314 * VG membership unless the partition is fully scanned.
1315 * We could, in theory, just scan the partition ourselves if
1316 * the VG cache is not ready, but we would be doing the exact
1317 * same scan the fileserver is doing; it will almost always
1318 * be faster to wait for the fileserver. The only exceptions
1319 * are if the partition does not take very long to scan, and
1320 * in that case it's fast either way, so who cares?
1322 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1323 Log("waiting for fileserver to finish scanning partition %s...\n",
1324 fileSysPartition->name);
1326 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1327 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1328 * just so small partitions don't need to wait over 10
1329 * seconds every time, and large partitions are generally
1330 * polled only once every ten seconds. */
1331 sleep((i > 10) ? (i = 10) : i);
1333 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1337 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1338 /* This can happen if there's no header for the volume
1339 * we're salvaging, or no headers exist for the VG (if
1340 * we're salvaging an RW). Act as if we got a response
1341 * with no VG members. The headers may be created during
1342 * salvaging, if there are inodes in this VG. */
1344 memset(&q_res, 0, sizeof(q_res));
1345 q_res.rw = singleVolumeNumber;
1349 Log("fileserver refused VGCQuery request for volume %lu on "
1350 "partition %s, code %ld reason %ld\n",
1351 afs_printable_uint32_lu(singleVolumeNumber),
1352 fileSysPartition->name,
1353 afs_printable_int32_ld(code),
1354 afs_printable_int32_ld(res.hdr.reason));
1358 if (q_res.rw != singleVolumeNumber) {
1359 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1360 afs_printable_uint32_lu(singleVolumeNumber),
1361 afs_printable_uint32_lu(q_res.rw));
1362 #ifdef SALVSYNC_BUILD_CLIENT
1363 if (SALVSYNC_LinkVolume(q_res.rw,
1365 fileSysPartition->name,
1367 Log("schedule request failed\n");
1369 #endif /* SALVSYNC_BUILD_CLIENT */
1370 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1373 volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1374 assert(volumeSummaryp != NULL);
1377 vsp = volumeSummaryp;
1379 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1380 char name[VMAXPATHLEN];
1382 if (!q_res.children[i]) {
1386 /* AskOffline for singleVolumeNumber was called much earlier */
1387 if (q_res.children[i] != singleVolumeNumber) {
1388 AskOffline(q_res.children[i], fileSysPartition->name);
1389 if (LockVolume(q_res.children[i])) {
1395 code = VReadVolumeDiskHeader(q_res.children[i], fileSysPartition, &diskHdr);
1397 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1398 afs_printable_uint32_lu(q_res.children[i]));
1403 DiskToVolumeHeader(&vsp->header, &diskHdr);
1404 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1405 vsp->fileName = ToString(name);
1410 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1415 Log("Cannot get volume summary from fileserver; falling back to scanning "
1416 "entire partition\n");
1419 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1424 * count how many volume headers are found by VWalkVolumeHeaders.
1426 * @param[in] dp the disk partition (unused)
1427 * @param[in] name full path to the .vol header (unused)
1428 * @param[in] hdr the header data (unused)
1429 * @param[in] last whether this is the last try or not (unused)
1430 * @param[in] rock actually an afs_int32*; the running count of how many
1431 * volumes we have found
1436 CountHeader(struct DiskPartition64 *dp, const char *name,
1437 struct VolumeDiskHeader *hdr, int last, void *rock)
1439 afs_int32 *nvols = (afs_int32 *)rock;
1445 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1448 struct SalvageScanParams {
1449 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1450 * vol id of the VG we're salvaging */
1451 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1452 * we're filling in */
1453 afs_int32 nVolumes; /**< # of vols we've encountered */
1454 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1455 * # of vols we've alloc'd memory for) */
1456 int retry; /**< do we need to retry vol lock/checkout? */
1460 * records volume summary info found from VWalkVolumeHeaders.
1462 * Found volumes are also taken offline if they are in the specific volume
1463 * group we are looking for.
1465 * @param[in] dp the disk partition
1466 * @param[in] name full path to the .vol header
1467 * @param[in] hdr the header data
1468 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1469 * @param[in] rock actually a struct SalvageScanParams*, containing the
1470 * information needed to record the volume summary data
1472 * @return operation status
1474 * @retval -1 volume locking raced with fileserver restart; checking out
1475 * and locking volumes needs to be retried
1476 * @retval 1 volume header is mis-named and should be deleted
1479 RecordHeader(struct DiskPartition64 *dp, const char *name,
1480 struct VolumeDiskHeader *hdr, int last, void *rock)
1482 char nameShouldBe[64];
1483 struct SalvageScanParams *params;
1484 struct VolumeSummary summary;
1485 VolumeId singleVolumeNumber;
1487 params = (struct SalvageScanParams *)rock;
1489 singleVolumeNumber = params->singleVolumeNumber;
1491 DiskToVolumeHeader(&summary.header, hdr);
1493 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1494 && summary.header.parent != singleVolumeNumber) {
1496 if (programType == salvageServer) {
1497 #ifdef SALVSYNC_BUILD_CLIENT
1498 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1499 summary.header.id, summary.header.parent);
1500 if (SALVSYNC_LinkVolume(summary.header.parent,
1504 Log("schedule request failed\n");
1507 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1510 Log("%u is a read-only volume; not salvaged\n",
1511 singleVolumeNumber);
1516 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1517 || summary.header.parent == singleVolumeNumber) {
1519 /* check if the header file is incorrectly named */
1521 const char *base = strrchr(name, '/');
1528 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1529 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1532 if (strcmp(nameShouldBe, base)) {
1533 /* .vol file has wrong name; retry/delete */
1537 if (!badname || last) {
1538 /* only offline the volume if the header is good, or if this is
1539 * the last try looking at it; avoid AskOffline'ing the same vol
1542 if (singleVolumeNumber
1543 && summary.header.id != singleVolumeNumber) {
1544 /* don't offline singleVolumeNumber; we already did that
1547 AskOffline(summary.header.id, fileSysPartition->name);
1549 #ifdef AFS_DEMAND_ATTACH_FS
1551 /* don't lock the volume if the header is bad, since we're
1552 * about to delete it anyway. */
1553 if (LockVolume(summary.header.id)) {
1558 #endif /* AFS_DEMAND_ATTACH_FS */
1562 if (last && !Showmode) {
1563 Log("Volume header file %s is incorrectly named (should be %s "
1564 "not %s); %sdeleted (it will be recreated later, if "
1565 "necessary)\n", name, nameShouldBe, base,
1566 (Testing ? "it would have been " : ""));
1571 summary.fileName = ToString(base);
1574 if (params->nVolumes > params->totalVolumes) {
1575 /* We found more volumes than we found on the first partition walk;
1576 * apparently something created a volume while we were
1577 * partition-salvaging, or we found more than 20 vols when salvaging a
1578 * particular volume. Abort if we detect this, since other programs
1579 * supposed to not touch the partition while it is partition-salvaging,
1580 * and we shouldn't find more than 20 vols in a VG.
1582 Abort("Found %ld vol headers, but should have found at most %ld! "
1583 "Make sure the volserver/fileserver are not running at the "
1584 "same time as a partition salvage\n",
1585 afs_printable_int32_ld(params->nVolumes),
1586 afs_printable_int32_ld(params->totalVolumes));
1589 memcpy(params->vsp, &summary, sizeof(summary));
1597 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1599 * If the header could not be read in at all, the header is always unlinked.
1600 * If instead RecordHeader said the header was bad (that is, the header file
1601 * is mis-named), we only unlink if we are doing a partition salvage, as
1602 * opposed to salvaging a specific volume group.
1604 * @param[in] dp the disk partition
1605 * @param[in] name full path to the .vol header
1606 * @param[in] hdr header data, or NULL if the header could not be read
1607 * @param[in] rock actually a struct SalvageScanParams*, with some information
1611 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1612 struct VolumeDiskHeader *hdr, void *rock)
1614 struct SalvageScanParams *params;
1617 params = (struct SalvageScanParams *)rock;
1620 /* no header; header is too bogus to read in at all */
1622 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1628 } else if (!params->singleVolumeNumber) {
1629 /* We were able to read in a header, but RecordHeader said something
1630 * was wrong with it. We only unlink those if we are doing a partition
1637 if (dounlink && unlink(name)) {
1638 Log("Error %d while trying to unlink %s\n", errno, name);
1643 * Populates volumeSummaryp with volume summary information, either by asking
1644 * the fileserver for VG information, or by scanning the /vicepX partition.
1646 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1647 * are salvaging, or 0 if this is a partition
1650 * @return operation status
1652 * @retval -1 we raced with a fileserver restart; checking out and locking
1653 * volumes must be retried
1656 GetVolumeSummary(VolumeId singleVolumeNumber)
1658 afs_int32 nvols = 0;
1659 struct SalvageScanParams params;
1662 code = AskVolumeSummary(singleVolumeNumber);
1664 /* we successfully got the vol information from the fileserver; no
1665 * need to scan the partition */
1669 /* we need to retry volume checkout */
1673 if (!singleVolumeNumber) {
1674 /* Count how many volumes we have in /vicepX */
1675 code = VWalkVolumeHeaders(fileSysPartition, fileSysPath, CountHeader,
1678 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1683 nvols = VOL_VG_MAX_VOLS;
1686 volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1687 assert(volumeSummaryp != NULL);
1689 params.singleVolumeNumber = singleVolumeNumber;
1690 params.vsp = volumeSummaryp;
1691 params.nVolumes = 0;
1692 params.totalVolumes = nvols;
1695 /* walk the partition directory of volume headers and record the info
1696 * about them; unlinking invalid headers */
1697 code = VWalkVolumeHeaders(fileSysPartition, fileSysPath, RecordHeader,
1698 UnlinkHeader, ¶ms);
1700 /* we apparently need to retry checking-out/locking volumes */
1704 Abort("Failed to get volume header summary\n");
1706 nVolumes = params.nVolumes;
1708 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1714 /* Find the link table. This should be associated with the RW volume or, if
1715 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1718 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1719 struct ViceInodeInfo *allInodes)
1722 struct ViceInodeInfo *ip;
1724 for (i = 0; i < nVols; i++) {
1725 ip = allInodes + isp[i].index;
1726 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1727 if (ip[j].u.special.type == VI_LINKTABLE)
1728 return ip[j].inodeNumber;
1735 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1737 struct versionStamp version;
1740 if (!VALID_INO(ino))
1742 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1743 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1744 if (!VALID_INO(ino))
1746 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1747 isp->RWvolumeId, errno);
1748 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1749 fdP = IH_OPEN(VGLinkH);
1751 Abort("Can't open link table for volume %u (error = %d)\n",
1752 isp->RWvolumeId, errno);
1754 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1755 Abort("Can't truncate link table for volume %u (error = %d)\n",
1756 isp->RWvolumeId, errno);
1758 version.magic = LINKTABLEMAGIC;
1759 version.version = LINKTABLEVERSION;
1761 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1763 Abort("Can't truncate link table for volume %u (error = %d)\n",
1764 isp->RWvolumeId, errno);
1766 FDH_REALLYCLOSE(fdP);
1768 /* If the volume summary exits (i.e., the V*.vol header file exists),
1769 * then set this inode there as well.
1771 if (isp->volSummary)
1772 isp->volSummary->header.linkTable = ino;
1781 SVGParms_t *parms = (SVGParms_t *) arg;
1782 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1787 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1790 pthread_attr_t tattr;
1794 /* Initialize per volume global variables, even if later code does so */
1798 memset(&VolInfo, 0, sizeof(VolInfo));
1800 parms.svgp_inodeSummaryp = isp;
1801 parms.svgp_count = nVols;
1802 code = pthread_attr_init(&tattr);
1804 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1808 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1810 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1813 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1815 Log("Failed to create thread to salvage volume group %u\n",
1819 (void)pthread_join(tid, NULL);
1821 #endif /* AFS_NT40_ENV */
1824 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1826 struct ViceInodeInfo *inodes, *allInodes, *ip;
1827 int i, totalInodes, size, salvageTo;
1831 int dec_VGLinkH = 0;
1833 FdHandle_t *fdP = NULL;
1836 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1837 && isp->nSpecialInodes > 0);
1838 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1839 if (!ForceSalvage && QuickCheck(isp, nVols))
1842 if (ShowMounts && !haveRWvolume)
1844 if (canfork && !debug && Fork() != 0) {
1845 (void)Wait("Salvage volume group");
1848 for (i = 0, totalInodes = 0; i < nVols; i++)
1849 totalInodes += isp[i].nInodes;
1850 size = totalInodes * sizeof(struct ViceInodeInfo);
1851 inodes = (struct ViceInodeInfo *)malloc(size);
1852 allInodes = inodes - isp->index; /* this would the base of all the inodes
1853 * for the partition, if all the inodes
1854 * had been read into memory */
1856 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1858 assert(read(inodeFd, inodes, size) == size);
1860 /* Don't try to salvage a read write volume if there isn't one on this
1862 salvageTo = haveRWvolume ? 0 : 1;
1864 #ifdef AFS_NAMEI_ENV
1865 ino = FindLinkHandle(isp, nVols, allInodes);
1866 if (VALID_INO(ino)) {
1867 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1868 fdP = IH_OPEN(VGLinkH);
1870 if (!VALID_INO(ino) || fdP == NULL) {
1871 Log("%s link table for volume %u.\n",
1872 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1874 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1877 struct ViceInodeInfo *ip;
1878 CreateLinkTable(isp, ino);
1879 fdP = IH_OPEN(VGLinkH);
1880 /* Sync fake 1 link counts to the link table, now that it exists */
1882 for (i = 0; i < nVols; i++) {
1883 ip = allInodes + isp[i].index;
1884 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1886 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1888 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1896 FDH_REALLYCLOSE(fdP);
1898 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1901 /* Salvage in reverse order--read/write volume last; this way any
1902 * Inodes not referenced by the time we salvage the read/write volume
1903 * can be picked up by the read/write volume */
1904 /* ACTUALLY, that's not done right now--the inodes just vanish */
1905 for (i = nVols - 1; i >= salvageTo; i--) {
1907 struct InodeSummary *lisp = &isp[i];
1908 #ifdef AFS_NAMEI_ENV
1909 /* If only the RO is present on this partition, the link table
1910 * shows up as a RW volume special file. Need to make sure the
1911 * salvager doesn't try to salvage the non-existent RW.
1913 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1914 /* If this only special inode is the link table, continue */
1915 if (inodes->u.special.type == VI_LINKTABLE) {
1922 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1923 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1924 /* Check inodes twice. The second time do things seriously. This
1925 * way the whole RO volume can be deleted, below, if anything goes wrong */
1926 for (check = 1; check >= 0; check--) {
1928 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1930 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1931 if (rw && deleteMe) {
1932 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1933 * volume won't be called */
1939 if (rw && check == 1)
1941 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1942 MaybeZapVolume(lisp, "Vnode index", 0, check);
1948 /* Fix actual inode counts */
1950 Log("totalInodes %d\n",totalInodes);
1951 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1952 static int TraceBadLinkCounts = 0;
1953 #ifdef AFS_NAMEI_ENV
1954 if (VGLinkH->ih_ino == ip->inodeNumber) {
1955 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1956 VGLinkH_p1 = ip->u.param[0];
1957 continue; /* Deal with this last. */
1960 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1961 TraceBadLinkCounts--; /* Limit reports, per volume */
1962 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1964 while (ip->linkCount > 0) {
1965 /* below used to assert, not break */
1967 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1968 Log("idec failed. inode %s errno %d\n",
1969 PrintInode(NULL, ip->inodeNumber), errno);
1975 while (ip->linkCount < 0) {
1976 /* these used to be asserts */
1978 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1979 Log("iinc failed. inode %s errno %d\n",
1980 PrintInode(NULL, ip->inodeNumber), errno);
1987 #ifdef AFS_NAMEI_ENV
1988 while (dec_VGLinkH > 0) {
1989 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1990 Log("idec failed on link table, errno = %d\n", errno);
1994 while (dec_VGLinkH < 0) {
1995 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1996 Log("iinc failed on link table, errno = %d\n", errno);
2003 /* Directory consistency checks on the rw volume */
2005 SalvageVolume(isp, VGLinkH);
2006 IH_RELEASE(VGLinkH);
2008 if (canfork && !debug) {
2015 QuickCheck(register struct InodeSummary *isp, int nVols)
2017 /* Check headers BEFORE forking */
2021 for (i = 0; i < nVols; i++) {
2022 struct VolumeSummary *vs = isp[i].volSummary;
2023 VolumeDiskData volHeader;
2025 /* Don't salvage just because phantom rw volume is there... */
2026 /* (If a read-only volume exists, read/write inodes must also exist) */
2027 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2031 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2032 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2033 == sizeof(volHeader)
2034 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2035 && volHeader.dontSalvage == DONT_SALVAGE
2036 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2037 if (volHeader.inUse != 0) {
2038 volHeader.inUse = 0;
2039 volHeader.inService = 1;
2041 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2042 != sizeof(volHeader)) {
2058 /* SalvageVolumeHeaderFile
2060 * Salvage the top level V*.vol header file. Make sure the special files
2061 * exist and that there are no duplicates.
2063 * Calls SalvageHeader for each possible type of volume special file.
2067 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
2068 register struct ViceInodeInfo *inodes, int RW,
2069 int check, int *deleteMe)
2072 register struct ViceInodeInfo *ip;
2073 int allinodesobsolete = 1;
2074 struct VolumeDiskHeader diskHeader;
2075 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2078 /* keeps track of special inodes that are probably 'good'; they are
2079 * referenced in the vol header, and are included in the given inodes
2084 } goodspecial[MAXINODETYPE];
2089 memset(goodspecial, 0, sizeof(goodspecial));
2091 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2093 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2095 Log("cannot allocate memory for inode skip array when salvaging "
2096 "volume %lu; not performing duplicate special inode recovery\n",
2097 afs_printable_uint32_lu(isp->volumeId));
2098 /* still try to perform the salvage; the skip array only does anything
2099 * if we detect duplicate special inodes */
2103 * First, look at the special inodes and see if any are referenced by
2104 * the existing volume header. If we find duplicate special inodes, we
2105 * can use this information to use the referenced inode (it's more
2106 * likely to be the 'good' one), and throw away the duplicates.
2108 if (isp->volSummary && skip) {
2109 /* use tempHeader, so we can use the stuff[] array to easily index
2110 * into the isp->volSummary special inodes */
2111 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2113 for (i = 0; i < isp->nSpecialInodes; i++) {
2114 ip = &inodes[isp->index + i];
2115 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2116 /* will get taken care of in a later loop */
2119 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2120 goodspecial[ip->u.special.type-1].valid = 1;
2121 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2126 memset(&tempHeader, 0, sizeof(tempHeader));
2127 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2128 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2129 tempHeader.id = isp->volumeId;
2130 tempHeader.parent = isp->RWvolumeId;
2132 /* Check for duplicates (inodes are sorted by type field) */
2133 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2134 ip = &inodes[isp->index + i];
2135 if (ip->u.special.type == (ip + 1)->u.special.type) {
2136 afs_ino_str_t stmp1, stmp2;
2138 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2139 /* Will be caught in the loop below */
2143 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2144 ip->u.special.type, isp->volumeId,
2145 PrintInode(stmp1, ip->inodeNumber),
2146 PrintInode(stmp2, (ip+1)->inodeNumber));
2148 if (skip && goodspecial[ip->u.special.type-1].valid) {
2149 Inode gi = goodspecial[ip->u.special.type-1].inode;
2152 Log("using special inode referenced by vol header (%s)\n",
2153 PrintInode(stmp1, gi));
2156 /* the volume header references some special inode of
2157 * this type in the inodes array; are we it? */
2158 if (ip->inodeNumber != gi) {
2160 } else if ((ip+1)->inodeNumber != gi) {
2161 /* in case this is the last iteration; we need to
2162 * make sure we check ip+1, too */
2167 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2175 for (i = 0; i < isp->nSpecialInodes; i++) {
2176 ip = &inodes[isp->index + i];
2177 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2179 Log("Rubbish header inode %s of type %d\n",
2180 PrintInode(NULL, ip->inodeNumber),
2181 ip->u.special.type);
2187 Log("Rubbish header inode %s of type %d; deleted\n",
2188 PrintInode(NULL, ip->inodeNumber),
2189 ip->u.special.type);
2190 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2191 if (skip && skip[i]) {
2192 if (orphans == ORPH_REMOVE) {
2193 Log("Removing orphan special inode %s of type %d\n",
2194 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2197 Log("Ignoring orphan special inode %s of type %d\n",
2198 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2199 /* fall through to the ip->linkCount--; line below */
2202 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2203 allinodesobsolete = 0;
2205 if (!check && ip->u.special.type != VI_LINKTABLE)
2206 ip->linkCount--; /* Keep the inode around */
2214 if (allinodesobsolete) {
2221 VGLinkH_cnt++; /* one for every header. */
2223 if (!RW && !check && isp->volSummary) {
2224 ClearROInUseBit(isp->volSummary);
2228 for (i = 0; i < MAXINODETYPE; i++) {
2229 if (stuff[i].inodeType == VI_LINKTABLE) {
2230 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2231 * And we may have recreated the link table earlier, so set the
2232 * RW header as well.
2234 if (VALID_INO(VGLinkH->ih_ino)) {
2235 *stuff[i].inode = VGLinkH->ih_ino;
2239 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
2243 if (isp->volSummary == NULL) {
2245 char headerName[64];
2246 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2247 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2249 Log("No header file for volume %u\n", isp->volumeId);
2253 Log("No header file for volume %u; %screating %s\n",
2254 isp->volumeId, (Testing ? "it would have been " : ""),
2256 isp->volSummary = (struct VolumeSummary *)
2257 malloc(sizeof(struct VolumeSummary));
2258 isp->volSummary->fileName = ToString(headerName);
2260 writefunc = VCreateVolumeDiskHeader;
2263 char headerName[64];
2264 /* hack: these two fields are obsolete... */
2265 isp->volSummary->header.volumeAcl = 0;
2266 isp->volSummary->header.volumeMountTable = 0;
2269 (&isp->volSummary->header, &tempHeader,
2270 sizeof(struct VolumeHeader))) {
2271 /* We often remove the name before calling us, so we make a fake one up */
2272 if (isp->volSummary->fileName) {
2273 strcpy(headerName, isp->volSummary->fileName);
2275 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2276 isp->volSummary->fileName = ToString(headerName);
2278 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2280 Log("Header file %s is damaged or no longer valid%s\n", path,
2281 (check ? "" : "; repairing"));
2285 writefunc = VWriteVolumeDiskHeader;
2289 memcpy(&isp->volSummary->header, &tempHeader,
2290 sizeof(struct VolumeHeader));
2293 Log("It would have written a new header file for volume %u\n",
2297 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2298 code = (*writefunc)(&diskHeader, fileSysPartition);
2300 Log("Error %ld writing volume header file for volume %lu\n",
2301 afs_printable_int32_ld(code),
2302 afs_printable_uint32_lu(diskHeader.id));
2307 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
2308 isp->volSummary->header.volumeInfo);
2313 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
2317 VolumeDiskData volumeInfo;
2318 struct versionStamp fileHeader;
2327 #ifndef AFS_NAMEI_ENV
2328 if (sp->inodeType == VI_LINKTABLE)
2331 if (*(sp->inode) == 0) {
2333 Log("Missing inode in volume header (%s)\n", sp->description);
2337 Log("Missing inode in volume header (%s); %s\n", sp->description,
2338 (Testing ? "it would have recreated it" : "recreating"));
2341 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
2342 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2343 if (!VALID_INO(*(sp->inode)))
2345 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2346 sp->description, errno);
2351 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
2352 fdP = IH_OPEN(specH);
2353 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2354 /* bail out early and destroy the volume */
2356 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2363 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2364 sp->description, errno);
2367 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2368 || header.fileHeader.magic != sp->stamp.magic)) {
2370 Log("Part of the header (%s) is corrupted\n", sp->description);
2371 FDH_REALLYCLOSE(fdP);
2375 Log("Part of the header (%s) is corrupted; recreating\n",
2379 if (sp->inodeType == VI_VOLINFO
2380 && header.volumeInfo.destroyMe == DESTROY_ME) {
2383 FDH_REALLYCLOSE(fdP);
2387 if (recreate && !Testing) {
2390 ("Internal error: recreating volume header (%s) in check mode\n",
2392 code = FDH_TRUNC(fdP, 0);
2394 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2395 sp->description, errno);
2397 /* The following code should be moved into vutil.c */
2398 if (sp->inodeType == VI_VOLINFO) {
2400 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2401 header.volumeInfo.stamp = sp->stamp;
2402 header.volumeInfo.id = isp->volumeId;
2403 header.volumeInfo.parentId = isp->RWvolumeId;
2404 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2405 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2406 isp->volumeId, isp->volumeId);
2407 header.volumeInfo.inService = 0;
2408 header.volumeInfo.blessed = 0;
2409 /* The + 1000 is a hack in case there are any files out in venus caches */
2410 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2411 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2412 header.volumeInfo.needsCallback = 0;
2413 gettimeofday(&tp, 0);
2414 header.volumeInfo.creationDate = tp.tv_sec;
2415 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2417 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2418 sp->description, errno);
2421 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2422 sizeof(header.volumeInfo));
2423 if (code != sizeof(header.volumeInfo)) {
2426 ("Unable to write volume header file (%s) (errno = %d)\n",
2427 sp->description, errno);
2428 Abort("Unable to write entire volume header file (%s)\n",
2432 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2434 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2435 sp->description, errno);
2437 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2438 if (code != sizeof(sp->stamp)) {
2441 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2442 sp->description, errno);
2444 ("Unable to write entire version stamp in volume header file (%s)\n",
2449 FDH_REALLYCLOSE(fdP);
2451 if (sp->inodeType == VI_VOLINFO) {
2452 VolInfo = header.volumeInfo;
2455 if (VolInfo.updateDate) {
2456 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
2458 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
2459 (Testing ? "it would have been " : ""), update);
2461 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
2463 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
2464 VolInfo.id, update);
2474 SalvageVnodes(register struct InodeSummary *rwIsp,
2475 register struct InodeSummary *thisIsp,
2476 register struct ViceInodeInfo *inodes, int check)
2478 int ilarge, ismall, ioffset, RW, nInodes;
2479 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2482 RW = (rwIsp == thisIsp);
2483 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2485 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2486 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2487 if (check && ismall == -1)
2490 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2491 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2492 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2496 SalvageIndex(Inode ino, VnodeClass class, int RW,
2497 register struct ViceInodeInfo *ip, int nInodes,
2498 struct VolumeSummary *volSummary, int check)
2500 VolumeId volumeNumber;
2501 char buf[SIZEOF_LARGEDISKVNODE];
2502 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2504 StreamHandle_t *file;
2505 struct VnodeClassInfo *vcp;
2507 afs_fsize_t vnodeLength;
2508 int vnodeIndex, nVnodes;
2509 afs_ino_str_t stmp1, stmp2;
2513 volumeNumber = volSummary->header.id;
2514 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2515 fdP = IH_OPEN(handle);
2516 assert(fdP != NULL);
2517 file = FDH_FDOPEN(fdP, "r+");
2518 assert(file != NULL);
2519 vcp = &VnodeClassInfo[class];
2520 size = OS_SIZE(fdP->fd_fd);
2522 nVnodes = (size / vcp->diskSize) - 1;
2524 assert((nVnodes + 1) * vcp->diskSize == size);
2525 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2529 for (vnodeIndex = 0;
2530 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2531 nVnodes--, vnodeIndex++) {
2532 if (vnode->type != vNull) {
2533 int vnodeChanged = 0;
2534 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2535 /* Log programs that belong to root (potentially suid root);
2536 * don't bother for read-only or backup volumes */
2537 #ifdef notdef /* This is done elsewhere */
2538 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2539 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2541 if (VNDISK_GET_INO(vnode) == 0) {
2543 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2544 memset(vnode, 0, vcp->diskSize);
2548 if (vcp->magic != vnode->vnodeMagic) {
2549 /* bad magic #, probably partially created vnode */
2550 Log("Partially allocated vnode %d deleted.\n",
2552 memset(vnode, 0, vcp->diskSize);
2556 /* ****** Should do a bit more salvage here: e.g. make sure
2557 * vnode type matches what it should be given the index */
2558 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2559 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2560 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2561 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2568 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2569 /* The following doesn't work, because the version number
2570 * is not maintained correctly by the file server */
2571 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2572 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2574 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2580 /* For RW volume, look for vnode with matching inode number;
2581 * if no such match, take the first determined by our sort
2583 register struct ViceInodeInfo *lip = ip;
2584 register int lnInodes = nInodes;
2586 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2587 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2596 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2597 /* "Matching" inode */
2601 vu = vnode->uniquifier;
2602 iu = ip->u.vnode.vnodeUniquifier;
2603 vd = vnode->dataVersion;
2604 id = ip->u.vnode.inodeDataVersion;
2606 * Because of the possibility of the uniquifier overflows (> 4M)
2607 * we compare them modulo the low 22-bits; we shouldn't worry
2608 * about mismatching since they shouldn't to many old
2609 * uniquifiers of the same vnode...
2611 if (IUnique(vu) != IUnique(iu)) {
2613 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2616 vnode->uniquifier = iu;
2617 #ifdef AFS_3DISPARES
2618 vnode->dataVersion = (id >= vd ?
2621 1887437 ? vd : id) :
2624 1887437 ? id : vd));
2626 #if defined(AFS_SGI_EXMAG)
2627 vnode->dataVersion = (id >= vd ?
2630 15099494 ? vd : id) :
2633 15099494 ? id : vd));
2635 vnode->dataVersion = (id > vd ? id : vd);
2636 #endif /* AFS_SGI_EXMAG */
2637 #endif /* AFS_3DISPARES */
2640 /* don't bother checking for vd > id any more, since
2641 * partial file transfers always result in this state,
2642 * and you can't do much else anyway (you've already
2643 * found the best data you can) */
2644 #ifdef AFS_3DISPARES
2645 if (!vnodeIsDirectory(vnodeNumber)
2646 && ((vd < id && (id - vd) < 1887437)
2647 || ((vd > id && (vd - id) > 1887437)))) {
2649 #if defined(AFS_SGI_EXMAG)
2650 if (!vnodeIsDirectory(vnodeNumber)
2651 && ((vd < id && (id - vd) < 15099494)
2652 || ((vd > id && (vd - id) > 15099494)))) {
2654 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2655 #endif /* AFS_SGI_EXMAG */
2658 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2659 vnode->dataVersion = id;
2664 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2667 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2669 VNDISK_SET_INO(vnode, ip->inodeNumber);
2674 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2676 VNDISK_SET_INO(vnode, ip->inodeNumber);
2679 VNDISK_GET_LEN(vnodeLength, vnode);
2680 if (ip->byteCount != vnodeLength) {
2683 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2688 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2689 VNDISK_SET_LEN(vnode, ip->byteCount);
2693 ip->linkCount--; /* Keep the inode around */
2696 } else { /* no matching inode */
2697 if (VNDISK_GET_INO(vnode) != 0
2698 || vnode->type == vDirectory) {
2699 /* No matching inode--get rid of the vnode */
2701 if (VNDISK_GET_INO(vnode)) {
2703 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2707 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2712 if (VNDISK_GET_INO(vnode)) {
2714 time_t serverModifyTime = vnode->serverModifyTime;
2715 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2719 time_t serverModifyTime = vnode->serverModifyTime;
2720 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2723 memset(vnode, 0, vcp->diskSize);
2726 /* Should not reach here becuase we checked for
2727 * (inodeNumber == 0) above. And where we zero the vnode,
2728 * we also goto vnodeDone.
2732 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2736 } /* VNDISK_GET_INO(vnode) != 0 */
2738 assert(!(vnodeChanged && check));
2739 if (vnodeChanged && !Testing) {
2741 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2742 (char *)vnode, vcp->diskSize)
2744 VolumeChanged = 1; /* For break call back */
2755 struct VnodeEssence *
2756 CheckVnodeNumber(VnodeId vnodeNumber)
2759 struct VnodeInfo *vip;
2762 class = vnodeIdToClass(vnodeNumber);
2763 vip = &vnodeInfo[class];
2764 offset = vnodeIdToBitNumber(vnodeNumber);
2765 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2769 CopyOnWrite(register struct DirSummary *dir)
2771 /* Copy the directory unconditionally if we are going to change it:
2772 * not just if was cloned.
2774 struct VnodeDiskObject vnode;
2775 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2776 Inode oldinode, newinode;
2779 if (dir->copied || Testing)
2781 DFlush(); /* Well justified paranoia... */
2784 IH_IREAD(vnodeInfo[vLarge].handle,
2785 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2787 assert(code == sizeof(vnode));
2788 oldinode = VNDISK_GET_INO(&vnode);
2789 /* Increment the version number by a whole lot to avoid problems with
2790 * clients that were promised new version numbers--but the file server
2791 * crashed before the versions were written to disk.
2794 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2795 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2797 assert(VALID_INO(newinode));
2798 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2800 VNDISK_SET_INO(&vnode, newinode);
2802 IH_IWRITE(vnodeInfo[vLarge].handle,
2803 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2805 assert(code == sizeof(vnode));
2807 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2808 fileSysDevice, newinode);
2809 /* Don't delete the original inode right away, because the directory is
2810 * still being scanned.
2816 * This function should either successfully create a new dir, or give up
2817 * and leave things the way they were. In particular, if it fails to write
2818 * the new dir properly, it should return w/o changing the reference to the
2822 CopyAndSalvage(register struct DirSummary *dir)
2824 struct VnodeDiskObject vnode;
2825 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2826 Inode oldinode, newinode;
2831 afs_int32 parentUnique = 1;
2832 struct VnodeEssence *vnodeEssence;
2837 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2839 IH_IREAD(vnodeInfo[vLarge].handle,
2840 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2842 assert(lcode == sizeof(vnode));
2843 oldinode = VNDISK_GET_INO(&vnode);
2844 /* Increment the version number by a whole lot to avoid problems with
2845 * clients that were promised new version numbers--but the file server
2846 * crashed before the versions were written to disk.
2849 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2850 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2852 assert(VALID_INO(newinode));
2853 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2855 /* Assign . and .. vnode numbers from dir and vnode.parent.
2856 * The uniquifier for . is in the vnode.
2857 * The uniquifier for .. might be set to a bogus value of 1 and
2858 * the salvager will later clean it up.
2860 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2861 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2864 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2866 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2871 /* didn't really build the new directory properly, let's just give up. */
2872 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2873 Log("Directory salvage returned code %d, continuing.\n", code);
2875 Log("also failed to decrement link count on new inode");
2879 Log("Checking the results of the directory salvage...\n");
2880 if (!DirOK(&newdir)) {
2881 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2882 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2887 VNDISK_SET_INO(&vnode, newinode);
2888 length = Length(&newdir);
2889 VNDISK_SET_LEN(&vnode, length);
2891 IH_IWRITE(vnodeInfo[vLarge].handle,
2892 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2894 assert(lcode == sizeof(vnode));
2897 nt_sync(fileSysDevice);
2899 sync(); /* this is slow, but hopefully rarely called. We don't have
2900 * an open FD on the file itself to fsync.
2904 vnodeInfo[vLarge].handle->ih_synced = 1;
2906 /* make sure old directory file is really closed */
2907 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2908 FDH_REALLYCLOSE(fdP);
2910 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2912 dir->dirHandle = newdir;
2916 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2919 struct DirSummary *dir = (struct DirSummary *)dirVal;
2920 struct VnodeEssence *vnodeEssence;
2921 afs_int32 dirOrphaned, todelete;
2923 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2925 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2926 if (vnodeEssence == NULL) {
2928 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2932 assert(Delete(&dir->dirHandle, name) == 0);
2937 #ifndef AFS_NAMEI_ENV
2938 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2939 * mount inode for the partition. If this inode were deleted, it would crash
2942 if (vnodeEssence->InodeNumber == 0) {
2943 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2946 assert(Delete(&dir->dirHandle, name) == 0);
2953 if (!(vnodeNumber & 1) && !Showmode
2954 && !(vnodeEssence->count || vnodeEssence->unique
2955 || vnodeEssence->modeBits)) {
2956 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2957 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2958 vnodeNumber, unique,
2959 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2964 assert(Delete(&dir->dirHandle, name) == 0);
2970 /* Check if the Uniquifiers match. If not, change the directory entry
2971 * so its unique matches the vnode unique. Delete if the unique is zero
2972 * or if the directory is orphaned.
2974 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2975 if (!vnodeEssence->unique
2976 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2977 /* This is an orphaned directory. Don't delete the . or ..
2978 * entry. Otherwise, it will get created in the next
2979 * salvage and deleted again here. So Just skip it.
2984 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2987 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2991 fid.Vnode = vnodeNumber;
2992 fid.Unique = vnodeEssence->unique;
2994 assert(Delete(&dir->dirHandle, name) == 0);
2996 assert(Create(&dir->dirHandle, name, &fid) == 0);
2999 return 0; /* no need to continue */
3002 if (strcmp(name, ".") == 0) {
3003 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3006 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3009 assert(Delete(&dir->dirHandle, ".") == 0);
3010 fid.Vnode = dir->vnodeNumber;
3011 fid.Unique = dir->unique;
3012 assert(Create(&dir->dirHandle, ".", &fid) == 0);
3015 vnodeNumber = fid.Vnode; /* Get the new Essence */
3016 unique = fid.Unique;
3017 vnodeEssence = CheckVnodeNumber(vnodeNumber);
3020 } else if (strcmp(name, "..") == 0) {
3023 struct VnodeEssence *dotdot;
3024 pa.Vnode = dir->parent;
3025 dotdot = CheckVnodeNumber(pa.Vnode);
3026 assert(dotdot != NULL); /* XXX Should not be assert */
3027 pa.Unique = dotdot->unique;
3029 pa.Vnode = dir->vnodeNumber;
3030 pa.Unique = dir->unique;
3032 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3034 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3037 assert(Delete(&dir->dirHandle, "..") == 0);
3038 assert(Create(&dir->dirHandle, "..", &pa) == 0);
3041 vnodeNumber = pa.Vnode; /* Get the new Essence */
3043 vnodeEssence = CheckVnodeNumber(vnodeNumber);
3045 dir->haveDotDot = 1;
3046 } else if (strncmp(name, ".__afs", 6) == 0) {
3048 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3052 assert(Delete(&dir->dirHandle, name) == 0);
3054 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3055 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3058 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3059 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3060 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3061 && !(vnodeEssence->modeBits & 0111)) {
3067 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3068 vnodeEssence->InodeNumber);
3071 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3075 size = FDH_SIZE(fdP);
3077 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
3078 FDH_REALLYCLOSE(fdP);
3085 code = FDH_READ(fdP, buf, size);
3088 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3089 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3090 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3091 Testing ? "would convert" : "converted");
3092 vnodeEssence->modeBits |= 0111;
3093 vnodeEssence->changed = 1;
3094 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3095 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3096 dir->name ? dir->name : "??", name, buf);
3098 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3099 dir->vname, vnodeNumber, size, code);
3101 FDH_REALLYCLOSE(fdP);
3104 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3105 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3106 if (vnodeIdToClass(vnodeNumber) == vLarge
3107 && vnodeEssence->name == NULL) {
3109 if ((n = (char *)malloc(strlen(name) + 1)))
3111 vnodeEssence->name = n;
3114 /* The directory entry points to the vnode. Check to see if the
3115 * vnode points back to the directory. If not, then let the
3116 * directory claim it (else it might end up orphaned). Vnodes
3117 * already claimed by another directory are deleted from this
3118 * directory: hardlinks to the same vnode are not allowed
3119 * from different directories.
3121 if (vnodeEssence->parent != dir->vnodeNumber) {
3122 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3123 /* Vnode does not point back to this directory.
3124 * Orphaned dirs cannot claim a file (it may belong to
3125 * another non-orphaned dir).
3128 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3130 vnodeEssence->parent = dir->vnodeNumber;
3131 vnodeEssence->changed = 1;
3133 /* Vnode was claimed by another directory */
3136 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3137 } else if (vnodeNumber == 1) {
3138 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3140 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3145 assert(Delete(&dir->dirHandle, name) == 0);
3150 /* This directory claims the vnode */
3151 vnodeEssence->claimed = 1;
3153 vnodeEssence->count--;
3158 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
3160 register struct VnodeInfo *vip = &vnodeInfo[class];
3161 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3162 char buf[SIZEOF_LARGEDISKVNODE];
3163 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3165 StreamHandle_t *file;
3170 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
3171 fdP = IH_OPEN(vip->handle);
3172 assert(fdP != NULL);
3173 file = FDH_FDOPEN(fdP, "r+");
3174 assert(file != NULL);
3175 size = OS_SIZE(fdP->fd_fd);
3177 vip->nVnodes = (size / vcp->diskSize) - 1;
3178 if (vip->nVnodes > 0) {
3179 assert((vip->nVnodes + 1) * vcp->diskSize == size);
3180 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
3181 assert((vip->vnodes = (struct VnodeEssence *)
3182 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3183 if (class == vLarge) {
3184 assert((vip->inodes = (Inode *)
3185 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3194 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3195 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3196 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3197 nVnodes--, vnodeIndex++) {
3198 if (vnode->type != vNull) {
3199 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3200 afs_fsize_t vnodeLength;
3201 vip->nAllocatedVnodes++;
3202 vep->count = vnode->linkCount;
3203 VNDISK_GET_LEN(vnodeLength, vnode);
3204 vep->blockCount = nBlocks(vnodeLength);
3205 vip->volumeBlockCount += vep->blockCount;
3206 vep->parent = vnode->parent;
3207 vep->unique = vnode->uniquifier;
3208 if (*maxu < vnode->uniquifier)
3209 *maxu = vnode->uniquifier;
3210 vep->modeBits = vnode->modeBits;
3211 vep->InodeNumber = VNDISK_GET_INO(vnode);
3212 vep->type = vnode->type;
3213 vep->author = vnode->author;
3214 vep->owner = vnode->owner;
3215 vep->group = vnode->group;
3216 if (vnode->type == vDirectory) {
3217 if (class != vLarge) {
3218 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3219 vip->nAllocatedVnodes--;
3220 memset(vnode, 0, sizeof(vnode));
3221 IH_IWRITE(vnodeInfo[vSmall].handle,
3222 vnodeIndexOffset(vcp, vnodeNumber),
3223 (char *)&vnode, sizeof(vnode));
3226 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3235 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
3237 struct VnodeEssence *parentvp;
3243 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
3244 && GetDirName(vp->parent, parentvp, path)) {
3246 strcat(path, vp->name);
3252 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3253 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3256 IsVnodeOrphaned(VnodeId vnode)
3258 struct VnodeEssence *vep;
3261 return (1); /* Vnode zero does not exist */
3263 return (0); /* The root dir vnode is always claimed */
3264 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
3265 if (!vep || !vep->claimed)
3266 return (1); /* Vnode is not claimed - it is orphaned */
3268 return (IsVnodeOrphaned(vep->parent));
3272 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
3273 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
3276 static struct DirSummary dir;
3277 static struct DirHandle dirHandle;
3278 struct VnodeEssence *parent;
3279 static char path[MAXPATHLEN];
3282 if (dirVnodeInfo->vnodes[i].salvaged)
3283 return; /* already salvaged */
3286 dirVnodeInfo->vnodes[i].salvaged = 1;
3288 if (dirVnodeInfo->inodes[i] == 0)
3289 return; /* Not allocated to a directory */
3291 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3292 if (dirVnodeInfo->vnodes[i].parent) {
3293 Log("Bad parent, vnode 1; %s...\n",
3294 (Testing ? "skipping" : "salvaging"));
3295 dirVnodeInfo->vnodes[i].parent = 0;
3296 dirVnodeInfo->vnodes[i].changed = 1;
3299 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
3300 if (parent && parent->salvaged == 0)
3301 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
3302 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3303 rootdir, rootdirfound);
3306 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3307 dir.unique = dirVnodeInfo->vnodes[i].unique;
3310 dir.parent = dirVnodeInfo->vnodes[i].parent;
3311 dir.haveDot = dir.haveDotDot = 0;
3312 dir.ds_linkH = alinkH;
3313 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
3314 dirVnodeInfo->inodes[i]);
3316 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3319 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3320 (Testing ? "skipping" : "salvaging"));
3323 CopyAndSalvage(&dir);
3327 dirHandle = dir.dirHandle;
3330 GetDirName(bitNumberToVnodeNumber(i, vLarge),
3331 &dirVnodeInfo->vnodes[i], path);
3334 /* If enumeration failed for random reasons, we will probably delete
3335 * too much stuff, so we guard against this instead.
3337 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
3340 /* Delete the old directory if it was copied in order to salvage.
3341 * CopyOnWrite has written the new inode # to the disk, but we still
3342 * have the old one in our local structure here. Thus, we idec the
3346 if (dir.copied && !Testing) {
3347 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3349 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3352 /* Remember rootdir DirSummary _after_ it has been judged */
3353 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3354 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3362 * Get a new FID that can be used to create a new file.
3364 * @param[in] volHeader vol header for the volume
3365 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3366 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3367 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3368 * updated to the new max unique if we create a new
3372 GetNewFID(VolumeDiskData *volHeader, VnodeClass class, AFSFid *afid,
3376 for (i = 0; i < vnodeInfo[class].nVnodes; i++) {
3377 if (vnodeInfo[class].vnodes[i].type == vNull) {
3381 if (i == vnodeInfo[class].nVnodes) {
3382 /* no free vnodes; make a new one */
3383 vnodeInfo[class].nVnodes++;
3384 vnodeInfo[class].vnodes = realloc(vnodeInfo[class].vnodes,
3385 sizeof(struct VnodeEssence) * (i+1));
3386 vnodeInfo[class].vnodes[i].type = vNull;
3389 afid->Vnode = bitNumberToVnodeNumber(i, class);
3391 if (volHeader->uniquifier < (*maxunique + 1)) {
3392 /* header uniq is bad; it will get bumped by 2000 later */
3393 afid->Unique = *maxunique + 1 + 2000;
3396 /* header uniq seems okay; just use that */
3397 afid->Unique = *maxunique = volHeader->uniquifier++;
3402 * Create a vnode for a README file explaining not to use a recreated-root vol.
3404 * @param[in] volHeader vol header for the volume
3405 * @param[in] alinkH ihandle for i/o for the volume
3406 * @param[in] vid volume id
3407 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3408 * updated to the new max unique if we create a new
3410 * @param[out] afid FID for the new readme vnode
3411 * @param[out] ainode the inode for the new readme file
3413 * @return operation status
3418 CreateReadme(VolumeDiskData *volHeader, IHandle_t *alinkH,
3419 VolumeId vid, Unique *maxunique, AFSFid *afid, Inode *ainode)
3422 struct VnodeDiskObject *rvnode = NULL;
3424 IHandle_t *readmeH = NULL;
3425 struct VnodeEssence *vep;
3427 time_t now = time(NULL);
3429 /* Try to make the note brief, but informative. Only administrators should
3430 * be able to read this file at first, so we can hopefully assume they
3431 * know what AFS is, what a volume is, etc. */
3433 "This volume has been salvaged, but has lost its original root directory.\n"
3434 "The root directory that exists now has been recreated from orphan files\n"
3435 "from the rest of the volume. This recreated root directory may interfere\n"
3436 "with old cached data on clients, and there is no way the salvager can\n"
3437 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3438 "use this volume, but only copy the salvaged data to a new volume.\n"
3439 "Continuing to use this volume as it exists now may cause some clients to\n"
3440 "behave oddly when accessing this volume.\n"
3441 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3442 /* ^ the person reading this probably just lost some data, so they could
3443 * use some cheering up. */
3445 /* -1 for the trailing NUL */
3446 length = sizeof(readme) - 1;
3448 GetNewFID(volHeader, vSmall, afid, maxunique);
3450 vep = &vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3452 /* create the inode and write the contents */
3453 readmeinode = IH_CREATE(alinkH, fileSysDevice, fileSysPath, 0, vid,
3454 afid->Vnode, afid->Unique, 1);
3455 if (!VALID_INO(readmeinode)) {
3456 Log("CreateReadme: readme IH_CREATE failed\n");
3460 IH_INIT(readmeH, fileSysDevice, vid, readmeinode);
3461 bytes = IH_IWRITE(readmeH, 0, readme, length);
3462 IH_RELEASE(readmeH);
3464 if (bytes != length) {
3465 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3466 (int)sizeof(readme));
3470 /* create the vnode and write it out */
3471 rvnode = malloc(SIZEOF_SMALLDISKVNODE);
3473 Log("CreateRootDir: error alloc'ing memory\n");
3477 rvnode->type = vFile;
3479 rvnode->modeBits = 0777;
3480 rvnode->linkCount = 1;
3481 VNDISK_SET_LEN(rvnode, length);
3482 rvnode->uniquifier = afid->Unique;
3483 rvnode->dataVersion = 1;
3484 VNDISK_SET_INO(rvnode, readmeinode);
3485 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3490 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3492 bytes = IH_IWRITE(vnodeInfo[vSmall].handle,
3493 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3494 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3496 if (bytes != SIZEOF_SMALLDISKVNODE) {
3497 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3498 (int)SIZEOF_SMALLDISKVNODE);
3502 /* update VnodeEssence for new readme vnode */
3503 vnodeInfo[vSmall].nAllocatedVnodes++;
3505 vep->blockCount = nBlocks(length);
3506 vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3507 vep->parent = rvnode->parent;
3508 vep->unique = rvnode->uniquifier;
3509 vep->modeBits = rvnode->modeBits;
3510 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3511 vep->type = rvnode->type;
3512 vep->author = rvnode->author;
3513 vep->owner = rvnode->owner;
3514 vep->group = rvnode->group;
3524 *ainode = readmeinode;
3529 if (IH_DEC(alinkH, readmeinode, vid)) {
3530 Log("CreateReadme (recovery): IH_DEC failed\n");
3542 * create a root dir for a volume that lacks one.
3544 * @param[in] volHeader vol header for the volume
3545 * @param[in] alinkH ihandle for disk access for this volume group
3546 * @param[in] vid volume id we're dealing with
3547 * @param[out] rootdir populated with info about the new root dir
3548 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3549 * updated to the new max unique if we create a new
3552 * @return operation status
3557 CreateRootDir(VolumeDiskData *volHeader, IHandle_t *alinkH, VolumeId vid,
3558 struct DirSummary *rootdir, Unique *maxunique)
3561 int decroot = 0, decreadme = 0;
3562 AFSFid did, readmeid;
3565 struct VnodeDiskObject *rootvnode = NULL;
3566 struct acl_accessList *ACL;
3569 struct VnodeEssence *vep;
3571 time_t now = time(NULL);
3573 if (!vnodeInfo[vLarge].vnodes && !vnodeInfo[vSmall].vnodes) {
3574 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3578 if (!vnodeInfo[vLarge].vnodes) {
3579 /* We don't have any large vnodes in the volume; allocate room
3580 * for one so we can recreate the root dir */
3581 vnodeInfo[vLarge].nVnodes = 1;
3582 vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3583 vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3585 assert(vnodeInfo[vLarge].vnodes);
3586 assert(vnodeInfo[vLarge].inodes);
3589 vep = &vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3590 ip = &vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3591 if (vep->type != vNull) {
3592 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3596 if (CreateReadme(volHeader, alinkH, vid, maxunique, &readmeid, &readmeinode)) {
3601 /* set the DV to a very high number, so it is unlikely that we collide
3602 * with a cached DV */
3605 rootinode = IH_CREATE(alinkH, fileSysDevice, fileSysPath, 0, vid, 1, 1, dv);
3606 if (!VALID_INO(rootinode)) {
3607 Log("CreateRootDir: IH_CREATE failed\n");
3612 SetSalvageDirHandle(&rootdir->dirHandle, vid, fileSysDevice, rootinode);
3616 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3617 Log("CreateRootDir: MakeDir failed\n");
3620 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3621 Log("CreateRootDir: Create failed\n");
3625 length = Length(&rootdir->dirHandle);
3626 DZap((void *)&rootdir->dirHandle);
3628 /* create the new root dir vnode */
3629 rootvnode = malloc(SIZEOF_LARGEDISKVNODE);
3631 Log("CreateRootDir: malloc failed\n");
3635 /* only give 'rl' permissions to 'system:administrators'. We do this to
3636 * try to catch the attention of an administrator, that they should not
3637 * be writing to this directory or continue to use it. */
3638 ACL = VVnodeDiskACL(rootvnode);
3639 ACL->size = sizeof(struct acl_accessList);
3640 ACL->version = ACL_ACLVERSION;
3644 ACL->entries[0].id = -204; /* system:administrators */
3645 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3647 rootvnode->type = vDirectory;
3648 rootvnode->cloned = 0;
3649 rootvnode->modeBits = 0777;
3650 rootvnode->linkCount = 2;
3651 VNDISK_SET_LEN(rootvnode, length);
3652 rootvnode->uniquifier = 1;
3653 rootvnode->dataVersion = dv;
3654 VNDISK_SET_INO(rootvnode, rootinode);
3655 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3656 rootvnode->author = 0;
3657 rootvnode->owner = 0;
3658 rootvnode->parent = 0;
3659 rootvnode->group = 0;
3660 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3662 /* write it out to disk */
3663 bytes = IH_IWRITE(vnodeInfo[vLarge].handle,
3664 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3665 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3667 if (bytes != SIZEOF_LARGEDISKVNODE) {
3668 /* just cast to int and don't worry about printing real 64-bit ints;
3669 * a large disk vnode isn't anywhere near the 32-bit limit */
3670 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3671 (int)SIZEOF_LARGEDISKVNODE);
3675 /* update VnodeEssence for the new root vnode */
3676 vnodeInfo[vLarge].nAllocatedVnodes++;
3678 vep->blockCount = nBlocks(length);
3679 vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3680 vep->parent = rootvnode->parent;
3681 vep->unique = rootvnode->uniquifier;
3682 vep->modeBits = rootvnode->modeBits;
3683 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3684 vep->type = rootvnode->type;
3685 vep->author = rootvnode->author;
3686 vep->owner = rootvnode->owner;
3687 vep->group = rootvnode->group;
3697 /* update DirSummary for the new root vnode */
3698 rootdir->vnodeNumber = 1;
3699 rootdir->unique = 1;
3700 rootdir->haveDot = 1;
3701 rootdir->haveDotDot = 1;
3702 rootdir->rwVid = vid;
3703 rootdir->copied = 0;
3704 rootdir->parent = 0;
3705 rootdir->name = strdup(".");
3706 rootdir->vname = volHeader->name;
3707 rootdir->ds_linkH = alinkH;
3714 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3715 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3717 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3718 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3728 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
3730 /* This routine, for now, will only be called for read-write volumes */
3732 int BlocksInVolume = 0, FilesInVolume = 0;
3733 register VnodeClass class;
3734 struct DirSummary rootdir, oldrootdir;
3735 struct VnodeInfo *dirVnodeInfo;
3736 struct VnodeDiskObject vnode;
3737 VolumeDiskData volHeader;
3739 int orphaned, rootdirfound = 0;
3740 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3741 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3742 struct VnodeEssence *vep;
3745 afs_sfsize_t nBytes;
3747 VnodeId LFVnode, ThisVnode;
3748 Unique LFUnique, ThisUnique;
3752 vid = rwIsp->volSummary->header.id;
3753 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3754 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3755 assert(nBytes == sizeof(volHeader));
3756 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3757 assert(volHeader.destroyMe != DESTROY_ME);
3758 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3760 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
3762 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
3765 dirVnodeInfo = &vnodeInfo[vLarge];
3766 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3767 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
3771 nt_sync(fileSysDevice);
3773 sync(); /* This used to be done lower level, for every dir */
3780 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3782 Log("Cannot find root directory for volume %lu; attempting to create "
3783 "a new one\n", afs_printable_uint32_lu(vid));
3785 code = CreateRootDir(&volHeader, alinkH, vid, &rootdir, &maxunique);
3792 /* Parse each vnode looking for orphaned vnodes and
3793 * connect them to the tree as orphaned (if requested).
3795 oldrootdir = rootdir;
3796 for (class = 0; class < nVNODECLASSES; class++) {
3797 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
3798 vep = &(vnodeInfo[class].vnodes[v]);
3799 ThisVnode = bitNumberToVnodeNumber(v, class);
3800 ThisUnique = vep->unique;
3802 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3803 continue; /* Ignore unused, claimed, and root vnodes */
3805 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3806 * entry in this vnode had incremented the parent link count (In
3807 * JudgeEntry()). We need to go to the parent and decrement that
3808 * link count. But if the parent's unique is zero, then the parent
3809 * link count was not incremented in JudgeEntry().
3811 if (class == vLarge) { /* directory vnode */
3812 pv = vnodeIdToBitNumber(vep->parent);
3813 if (vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3814 if (vep->parent == 1 && newrootdir) {
3815 /* this vnode's parent was the volume root, and
3816 * we just created the volume root. So, the parent
3817 * dir didn't exist during JudgeEntry, so the link
3818 * count was not inc'd there, so don't dec it here.
3824 vnodeInfo[vLarge].vnodes[pv].count++;
3830 continue; /* If no rootdir, can't attach orphaned files */
3832 /* Here we attach orphaned files and directories into the
3833 * root directory, LVVnode, making sure link counts stay correct.
3835 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3836 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3837 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3839 /* Update this orphaned vnode's info. Its parent info and
3840 * link count (do for orphaned directories and files).
3842 vep->parent = LFVnode; /* Parent is the root dir */
3843 vep->unique = LFUnique;
3846 vep->count--; /* Inc link count (root dir will pt to it) */
3848 /* If this orphaned vnode is a directory, change '..'.
3849 * The name of the orphaned dir/file is unknown, so we
3850 * build a unique name. No need to CopyOnWrite the directory
3851 * since it is not connected to tree in BK or RO volume and
3852 * won't be visible there.
3854 if (class == vLarge) {
3858 /* Remove and recreate the ".." entry in this orphaned directory */
3859 SetSalvageDirHandle(&dh, vid, fileSysDevice,
3860 vnodeInfo[class].inodes[v]);
3862 pa.Unique = LFUnique;
3863 assert(Delete(&dh, "..") == 0);
3864 assert(Create(&dh, "..", &pa) == 0);
3866 /* The original parent's link count was decremented above.
3867 * Here we increment the new parent's link count.
3869 pv = vnodeIdToBitNumber(LFVnode);
3870 vnodeInfo[vLarge].vnodes[pv].count--;
3874 /* Go to the root dir and add this entry. The link count of the
3875 * root dir was incremented when ".." was created. Try 10 times.
3877 for (j = 0; j < 10; j++) {
3878 pa.Vnode = ThisVnode;
3879 pa.Unique = ThisUnique;
3881 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3883 vLarge) ? "__ORPHANDIR__" :
3884 "__ORPHANFILE__"), ThisVnode,
3887 CopyOnWrite(&rootdir);
3888 code = Create(&rootdir.dirHandle, npath, &pa);
3892 ThisUnique += 50; /* Try creating a different file */
3895 Log("Attaching orphaned %s to volume's root dir as %s\n",
3896 ((class == vLarge) ? "directory" : "file"), npath);
3898 } /* for each vnode in the class */
3899 } /* for each class of vnode */
3901 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3903 if (!oldrootdir.copied && rootdir.copied) {
3905 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3908 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3911 DFlush(); /* Flush the changes */
3912 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3913 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3914 orphans = ORPH_IGNORE;
3917 /* Write out all changed vnodes. Orphaned files and directories
3918 * will get removed here also (if requested).
3920 for (class = 0; class < nVNODECLASSES; class++) {
3921 int nVnodes = vnodeInfo[class].nVnodes;
3922 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3923 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3924 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3925 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3926 for (i = 0; i < nVnodes; i++) {
3927 register struct VnodeEssence *vnp = &vnodes[i];
3928 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3930 /* If the vnode is good but is unclaimed (not listed in
3931 * any directory entries), then it is orphaned.
3934 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3935 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3939 if (vnp->changed || vnp->count) {
3943 IH_IREAD(vnodeInfo[class].handle,
3944 vnodeIndexOffset(vcp, vnodeNumber),
3945 (char *)&vnode, sizeof(vnode));
3946 assert(nBytes == sizeof(vnode));
3948 vnode.parent = vnp->parent;
3949 oldCount = vnode.linkCount;
3950 vnode.linkCount = vnode.linkCount - vnp->count;
3953 orphaned = IsVnodeOrphaned(vnodeNumber);
3955 if (!vnp->todelete) {
3956 /* Orphans should have already been attached (if requested) */
3957 assert(orphans != ORPH_ATTACH);
3958 oblocks += vnp->blockCount;
3961 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3963 BlocksInVolume -= vnp->blockCount;
3965 if (VNDISK_GET_INO(&vnode)) {
3967 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3970 memset(&vnode, 0, sizeof(vnode));
3972 } else if (vnp->count) {
3974 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3977 vnode.modeBits = vnp->modeBits;
3980 vnode.dataVersion++;
3983 IH_IWRITE(vnodeInfo[class].handle,
3984 vnodeIndexOffset(vcp, vnodeNumber),
3985 (char *)&vnode, sizeof(vnode));
3986 assert(nBytes == sizeof(vnode));
3992 if (!Showmode && ofiles) {
3993 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3995 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3999 for (class = 0; class < nVNODECLASSES; class++) {
4000 register struct VnodeInfo *vip = &vnodeInfo[class];
4001 for (i = 0; i < vip->nVnodes; i++)
4002 if (vip->vnodes[i].name)
4003 free(vip->vnodes[i].name);
4010 /* Set correct resource utilization statistics */
4011 volHeader.filecount = FilesInVolume;
4012 volHeader.diskused = BlocksInVolume;
4014 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4015 if (volHeader.uniquifier < (maxunique + 1)) {
4017 Log("Volume uniquifier is too low; fixed\n");
4018 /* Plus 2,000 in case there are workstations out there with
4019 * cached vnodes that have since been deleted
4021 volHeader.uniquifier = (maxunique + 1 + 2000);
4025 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4026 "Only use this salvaged volume to copy data to another volume; "
4027 "do not continue to use this volume (%lu) as-is.\n",
4028 afs_printable_uint32_lu(vid));
4031 /* Turn off the inUse bit; the volume's been salvaged! */
4032 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4033 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4034 volHeader.inService = 1; /* allow service again */
4035 volHeader.needsCallback = (VolumeChanged != 0);
4036 volHeader.dontSalvage = DONT_SALVAGE;
4039 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4040 assert(nBytes == sizeof(volHeader));
4043 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4044 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4045 FilesInVolume, BlocksInVolume);
4047 IH_RELEASE(vnodeInfo[vSmall].handle);
4048 IH_RELEASE(vnodeInfo[vLarge].handle);
4054 ClearROInUseBit(struct VolumeSummary *summary)
4056 IHandle_t *h = summary->volumeInfoHandle;
4057 afs_sfsize_t nBytes;
4059 VolumeDiskData volHeader;
4061 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4062 assert(nBytes == sizeof(volHeader));
4063 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4064 volHeader.inUse = 0;
4065 volHeader.needsSalvaged = 0;
4066 volHeader.inService = 1;
4067 volHeader.dontSalvage = DONT_SALVAGE;
4069 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4070 assert(nBytes == sizeof(volHeader));
4075 * Possible delete the volume.
4077 * deleteMe - Always do so, only a partial volume.
4080 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
4083 if (readOnly(isp) || deleteMe) {
4084 if (isp->volSummary && isp->volSummary->fileName) {
4087 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4089 Log("It will be deleted on this server (you may find it elsewhere)\n");
4092 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4094 Log("it will be deleted instead. It should be recloned.\n");
4099 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
4101 code = VDestroyVolumeDiskHeader(fileSysPartition, isp->volumeId, isp->RWvolumeId);
4103 Log("Error %ld destroying volume disk header for volume %lu\n",
4104 afs_printable_int32_ld(code),
4105 afs_printable_uint32_lu(isp->volumeId));
4108 /* make sure we actually delete the fileName file; ENOENT
4109 * is fine, since VDestroyVolumeDiskHeader probably already
4111 if (unlink(path) && errno != ENOENT) {
4112 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4116 } else if (!check) {
4117 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4119 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4123 #ifdef AFS_DEMAND_ATTACH_FS
4125 * Locks a volume on disk for salvaging.
4127 * @param[in] volumeId volume ID to lock
4129 * @return operation status
4131 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4132 * checked out and locked again
4137 LockVolume(VolumeId volumeId)
4142 /* should always be WRITE_LOCK, but keep the lock-type logic all
4143 * in one place, in VVolLockType. Params will be ignored, but
4144 * try to provide what we're logically doing. */
4145 locktype = VVolLockType(V_VOLUPD, 1);
4147 code = VLockVolumeByIdNB(volumeId, fileSysPartition, locktype);
4149 if (code == EBUSY) {
4150 Abort("Someone else appears to be using volume %lu; Aborted\n",
4151 afs_printable_uint32_lu(volumeId));
4153 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4154 afs_printable_int32_ld(code),
4155 afs_printable_uint32_lu(volumeId));
4158 code = FSYNC_VerifyCheckout(volumeId, fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4159 if (code == SYNC_DENIED) {
4160 /* need to retry checking out volumes */
4163 if (code != SYNC_OK) {
4164 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4165 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4168 /* set inUse = programType in the volume header to ensure that nobody
4169 * tries to use this volume again without salvaging, if we somehow crash
4170 * or otherwise exit before finishing the salvage.
4174 struct VolumeHeader header;
4175 struct VolumeDiskHeader diskHeader;
4176 struct VolumeDiskData volHeader;
4178 code = VReadVolumeDiskHeader(volumeId, fileSysPartition, &diskHeader);
4183 DiskToVolumeHeader(&header, &diskHeader);
4185 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
4186 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4187 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4193 volHeader.inUse = programType;
4195 /* If we can't re-write the header, bail out and error. We don't
4196 * assert when reading the header, since it's possible the
4197 * header isn't really there (when there's no data associated
4198 * with the volume; we just delete the vol header file in that
4199 * case). But if it's there enough that we can read it, but
4200 * somehow we cannot write to it to signify we're salvaging it,
4201 * we've got a big problem and we cannot continue. */
4202 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4209 #endif /* AFS_DEMAND_ATTACH_FS */
4212 AskOffline(VolumeId volumeId, char * partition)
4217 memset(&res, 0, sizeof(res));
4219 for (i = 0; i < 3; i++) {
4220 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4222 if (code == SYNC_OK) {
4224 } else if (code == SYNC_DENIED) {
4225 #ifdef DEMAND_ATTACH_ENABLE
4226 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4228 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4230 Abort("Salvage aborted\n");
4231 } else if (code == SYNC_BAD_COMMAND) {
4232 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4234 #ifdef DEMAND_ATTACH_ENABLE
4235 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4237 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4239 Abort("Salvage aborted\n");
4242 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4243 FSYNC_clientFinis();
4247 if (code != SYNC_OK) {
4248 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4249 Abort("Salvage aborted\n");
4254 AskOnline(VolumeId volumeId, char *partition)
4258 for (i = 0; i < 3; i++) {
4259 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4261 if (code == SYNC_OK) {
4263 } else if (code == SYNC_DENIED) {
4264 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
4265 } else if (code == SYNC_BAD_COMMAND) {
4266 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4268 #ifdef DEMAND_ATTACH_ENABLE
4269 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4271 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4276 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4277 FSYNC_clientFinis();
4284 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4286 /* Volume parameter is passed in case iopen is upgraded in future to
4287 * require a volume Id to be passed
4290 IHandle_t *srcH, *destH;
4291 FdHandle_t *srcFdP, *destFdP;
4294 IH_INIT(srcH, device, rwvolume, inode1);
4295 srcFdP = IH_OPEN(srcH);
4296 assert(srcFdP != NULL);
4297 IH_INIT(destH, device, rwvolume, inode2);
4298 destFdP = IH_OPEN(destH);
4300 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
4301 assert(FDH_WRITE(destFdP, buf, n) == n);
4303 FDH_REALLYCLOSE(srcFdP);
4304 FDH_REALLYCLOSE(destFdP);
4311 PrintInodeList(void)
4313 register struct ViceInodeInfo *ip;
4314 struct ViceInodeInfo *buf;
4315 struct afs_stat status;
4316 register int nInodes;
4318 assert(afs_fstat(inodeFd, &status) == 0);
4319 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4320 assert(buf != NULL);
4321 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4322 assert(read(inodeFd, buf, status.st_size) == status.st_size);
4323 for (ip = buf; nInodes--; ip++) {
4324 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4325 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
4326 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4327 ip->u.param[2], ip->u.param[3]);
4333 PrintInodeSummary(void)
4336 struct InodeSummary *isp;
4338 for (i = 0; i < nVolumesInInodeFile; i++) {
4339 isp = &inodeSummary[i];
4340 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4345 PrintVolumeSummary(void)
4348 struct VolumeSummary *vsp;
4350 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
4351 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4361 assert(0); /* Fork is never executed in the NT code path */
4365 #ifdef AFS_DEMAND_ATTACH_FS
4366 if ((f == 0) && (programType == salvageServer)) {
4367 /* we are a salvageserver child */
4368 #ifdef FSSYNC_BUILD_CLIENT
4369 VChildProcReconnectFS_r();
4371 #ifdef SALVSYNC_BUILD_CLIENT
4375 #endif /* AFS_DEMAND_ATTACH_FS */
4376 #endif /* !AFS_NT40_ENV */
4386 #ifdef AFS_DEMAND_ATTACH_FS
4387 if (programType == salvageServer) {
4388 #ifdef SALVSYNC_BUILD_CLIENT
4391 #ifdef FSSYNC_BUILD_CLIENT
4395 #endif /* AFS_DEMAND_ATTACH_FS */
4398 if (main_thread != pthread_self())
4399 pthread_exit((void *)code);
4412 pid = wait(&status);
4414 if (WCOREDUMP(status))
4415 Log("\"%s\" core dumped!\n", prog);
4416 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4422 TimeStamp(time_t clock, int precision)
4425 static char timestamp[20];
4426 lt = localtime(&clock);
4428 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4430 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4435 CheckLogFile(char * log_path)
4437 char oldSlvgLog[AFSDIR_PATH_MAX];
4439 #ifndef AFS_NT40_ENV
4446 strcpy(oldSlvgLog, log_path);
4447 strcat(oldSlvgLog, ".old");
4449 renamefile(log_path, oldSlvgLog);
4450 logFile = afs_fopen(log_path, "a");
4452 if (!logFile) { /* still nothing, use stdout */
4456 #ifndef AFS_NAMEI_ENV
4457 AFS_DEBUG_IOPS_LOG(logFile);
4462 #ifndef AFS_NT40_ENV
4464 TimeStampLogFile(char * log_path)
4466 char stampSlvgLog[AFSDIR_PATH_MAX];
4471 lt = localtime(&now);
4472 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4473 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4474 log_path, lt->tm_year + 1900,
4475 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4478 /* try to link the logfile to a timestamped filename */
4479 /* if it fails, oh well, nothing we can do */
4480 link(log_path, stampSlvgLog);
4489 #ifndef AFS_NT40_ENV
4491 printf("Can't show log since using syslog.\n");
4502 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4505 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4508 while (fgets(line, sizeof(line), logFile))
4515 Log(const char *format, ...)
4521 va_start(args, format);
4522 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4524 #ifndef AFS_NT40_ENV
4526 syslog(LOG_INFO, "%s", tmp);
4530 gettimeofday(&now, 0);
4531 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4537 Abort(const char *format, ...)
4542 va_start(args, format);
4543 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4545 #ifndef AFS_NT40_ENV
4547 syslog(LOG_INFO, "%s", tmp);
4551 fprintf(logFile, "%s", tmp);
4563 ToString(const char *s)
4566 p = (char *)malloc(strlen(s) + 1);
4572 /* Remove the FORCESALVAGE file */
4574 RemoveTheForce(char *path)
4577 struct afs_stat force; /* so we can use afs_stat to find it */
4578 strcpy(target,path);
4579 strcat(target,"/FORCESALVAGE");
4580 if (!Testing && ForceSalvage) {
4581 if (afs_stat(target,&force) == 0) unlink(target);
4585 #ifndef AFS_AIX32_ENV
4587 * UseTheForceLuke - see if we can use the force
4590 UseTheForceLuke(char *path)
4592 struct afs_stat force;
4594 strcpy(target,path);
4595 strcat(target,"/FORCESALVAGE");
4597 return (afs_stat(target, &force) == 0);
4601 * UseTheForceLuke - see if we can use the force
4604 * The VRMIX fsck will not muck with the filesystem it is supposedly
4605 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4606 * muck directly with the root inode, which is within the normal
4608 * ListViceInodes() has a side effect of setting ForceSalvage if
4609 * it detects a need, based on root inode examination.
4612 UseTheForceLuke(char *path)
4615 return 0; /* sorry OB1 */
4620 /* NT support routines */
4622 static char execpathname[MAX_PATH];
4624 nt_SalvagePartition(char *partName, int jobn)
4629 if (!*execpathname) {
4630 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4631 if (!n || n == 1023)
4634 job.cj_magic = SALVAGER_MAGIC;
4635 job.cj_number = jobn;
4636 (void)strcpy(job.cj_part, partName);
4637 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4642 nt_SetupPartitionSalvage(void *datap, int len)
4644 childJob_t *jobp = (childJob_t *) datap;
4645 char logname[AFSDIR_PATH_MAX];
4647 if (len != sizeof(childJob_t))
4649 if (jobp->cj_magic != SALVAGER_MAGIC)
4654 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4656 logFile = afs_fopen(logname, "w");
4664 #endif /* AFS_NT40_ENV */