2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
193 #ifdef FSSYNC_BUILD_CLIENT
194 #include "vg_cache.h"
201 /*@+fcnmacros +macrofcndecl@*/
204 extern off64_t afs_lseek(int FD, off64_t O, int F);
205 #endif /*S_SPLINT_S */
206 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
207 #define afs_stat stat64
208 #define afs_fstat fstat64
209 #define afs_open open64
210 #define afs_fopen fopen64
211 #else /* !O_LARGEFILE */
213 extern off_t afs_lseek(int FD, off_t O, int F);
214 #endif /*S_SPLINT_S */
215 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
216 #define afs_stat stat
217 #define afs_fstat fstat
218 #define afs_open open
219 #define afs_fopen fopen
220 #endif /* !O_LARGEFILE */
221 /*@=fcnmacros =macrofcndecl@*/
224 extern void *calloc();
226 static char *TimeStamp(time_t clock, int precision);
229 int debug; /* -d flag */
230 extern int Testing; /* -n flag */
231 int ListInodeOption; /* -i flag */
232 int ShowRootFiles; /* -r flag */
233 int RebuildDirs; /* -sal flag */
234 int Parallel = 4; /* -para X flag */
235 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
236 int forceR = 0; /* -b flag */
237 int ShowLog = 0; /* -showlog flag */
238 int ShowSuid = 0; /* -showsuid flag */
239 int ShowMounts = 0; /* -showmounts flag */
240 int orphans = ORPH_IGNORE; /* -orphans option */
245 int useSyslog = 0; /* -syslog flag */
246 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
255 #define MAXPARALLEL 32
257 int OKToZap; /* -o flag */
258 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
259 * in the volume header */
261 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
263 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
265 Device fileSysDevice; /* The device number of the current
266 * partition being salvaged */
270 char *fileSysPath; /* The path of the mounted partition currently
271 * being salvaged, i.e. the directory
272 * containing the volume headers */
274 char *fileSysPathName; /* NT needs this to make name pretty in log. */
275 IHandle_t *VGLinkH; /* Link handle for current volume group. */
276 int VGLinkH_cnt; /* # of references to lnk handle. */
277 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
279 char *fileSysDeviceName; /* The block device where the file system
280 * being salvaged was mounted */
281 char *filesysfulldev;
283 int VolumeChanged; /* Set by any routine which would change the volume in
284 * a way which would require callback is to be broken if the
285 * volume was put back on line by an active file server */
287 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
289 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
290 int inodeFd; /* File descriptor for inode file */
293 struct VnodeInfo vnodeInfo[nVNODECLASSES];
296 struct VolumeSummary *volumeSummaryp = NULL; /* Holds all the volumes in a part */
297 int nVolumes; /* Number of volumes (read-write and read-only)
298 * in volume summary */
304 /* Forward declarations */
305 /*@printflike@*/ void Log(const char *format, ...);
306 /*@printflike@*/ void Abort(const char *format, ...);
307 static int IsVnodeOrphaned(VnodeId vnode);
308 static int AskVolumeSummary(VolumeId singleVolumeNumber);
310 /* Uniquifier stored in the Inode */
315 return (u & 0x3fffff);
317 #if defined(AFS_SGI_EXMAG)
318 return (u & SGI_UNIQMASK);
321 #endif /* AFS_SGI_EXMAG */
326 BadError(register int aerror)
328 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
330 return 0; /* otherwise may be transient, e.g. EMFILE */
335 char *save_args[MAX_ARGS];
337 extern pthread_t main_thread;
338 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
341 /* Get the salvage lock if not already held. Hold until process exits. */
343 ObtainSalvageLock(void)
349 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
350 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
351 if (salvageLock == INVALID_FD) {
353 "salvager: There appears to be another salvager running! Aborted.\n");
358 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
359 if (salvageLock < 0) {
361 "salvager: can't open salvage lock file %s, aborting\n",
362 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
365 #ifdef AFS_DARWIN_ENV
366 if (flock(salvageLock, LOCK_EX) == -1) {
368 if (lockf(salvageLock, F_LOCK, 0) == -1) {
371 "salvager: There appears to be another salvager running! Aborted.\n");
378 #ifdef AFS_SGI_XFS_IOPS_ENV
379 /* Check if the given partition is mounted. For XFS, the root inode is not a
380 * constant. So we check the hard way.
383 IsPartitionMounted(char *part)
386 struct mntent *mntent;
388 assert(mntfp = setmntent(MOUNTED, "r"));
389 while (mntent = getmntent(mntfp)) {
390 if (!strcmp(part, mntent->mnt_dir))
395 return mntent ? 1 : 1;
398 /* Check if the given inode is the root of the filesystem. */
399 #ifndef AFS_SGI_XFS_IOPS_ENV
401 IsRootInode(struct afs_stat *status)
404 * The root inode is not a fixed value in XFS partitions. So we need to
405 * see if the partition is in the list of mounted partitions. This only
406 * affects the SalvageFileSys path, so we check there.
408 return (status->st_ino == ROOTINODE);
413 #ifndef AFS_NAMEI_ENV
414 /* We don't want to salvage big files filesystems, since we can't put volumes on
418 CheckIfBigFilesFS(char *mountPoint, char *devName)
420 struct superblock fs;
423 if (strncmp(devName, "/dev/", 5)) {
424 (void)sprintf(name, "/dev/%s", devName);
426 (void)strcpy(name, devName);
429 if (ReadSuper(&fs, name) < 0) {
430 Log("Unable to read superblock. Not salvaging partition %s.\n",
434 if (IsBigFilesFileSystem(&fs)) {
435 Log("Partition %s is a big files filesystem, not salvaging.\n",
445 #define HDSTR "\\Device\\Harddisk"
446 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
448 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
453 static int dowarn = 1;
455 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
457 if (strncmp(res, HDSTR, HDLEN)) {
460 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
461 res, HDSTR, p1->devName);
465 d1 = atoi(&res[HDLEN]);
467 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
469 if (strncmp(res, HDSTR, HDLEN)) {
472 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
473 res, HDSTR, p2->devName);
477 d2 = atoi(&res[HDLEN]);
482 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
485 /* This assumes that two partitions with the same device number divided by
486 * PartsPerDisk are on the same disk.
489 SalvageFileSysParallel(struct DiskPartition64 *partP)
492 struct DiskPartition64 *partP;
493 int pid; /* Pid for this job */
494 int jobnumb; /* Log file job number */
495 struct job *nextjob; /* Next partition on disk to salvage */
497 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
498 struct job *thisjob = 0;
499 static int numjobs = 0;
500 static int jobcount = 0;
506 char logFileName[256];
510 /* We have a partition to salvage. Copy it into thisjob */
511 thisjob = (struct job *)malloc(sizeof(struct job));
513 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
516 memset(thisjob, 0, sizeof(struct job));
517 thisjob->partP = partP;
518 thisjob->jobnumb = jobcount;
520 } else if (jobcount == 0) {
521 /* We are asking to wait for all jobs (partp == 0), yet we never
524 Log("No file system partitions named %s* found; not salvaged\n",
525 VICE_PARTITION_PREFIX);
529 if (debug || Parallel == 1) {
531 SalvageFileSys(thisjob->partP, 0);
538 /* Check to see if thisjob is for a disk that we are already
539 * salvaging. If it is, link it in as the next job to do. The
540 * jobs array has 1 entry per disk being salvages. numjobs is
541 * the total number of disks currently being salvaged. In
542 * order to keep thejobs array compact, when a disk is
543 * completed, the hightest element in the jobs array is moved
544 * down to now open slot.
546 for (j = 0; j < numjobs; j++) {
547 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
548 /* On same disk, add it to this list and return */
549 thisjob->nextjob = jobs[j]->nextjob;
550 jobs[j]->nextjob = thisjob;
557 /* Loop until we start thisjob or until all existing jobs are finished */
558 while (thisjob || (!partP && (numjobs > 0))) {
559 startjob = -1; /* No new job to start */
561 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
562 /* Either the max jobs are running or we have to wait for all
563 * the jobs to finish. In either case, we wait for at least one
564 * job to finish. When it's done, clean up after it.
566 pid = wait(&wstatus);
568 for (j = 0; j < numjobs; j++) { /* Find which job it is */
569 if (pid == jobs[j]->pid)
573 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
574 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
577 numjobs--; /* job no longer running */
578 oldjob = jobs[j]; /* remember */
579 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
580 free(oldjob); /* free the old job */
582 /* If there is another partition on the disk to salvage, then
583 * say we will start it (startjob). If not, then put thisjob there
584 * and say we will start it.
586 if (jobs[j]) { /* Another partitions to salvage */
587 startjob = j; /* Will start it */
588 } else { /* There is not another partition to salvage */
590 jobs[j] = thisjob; /* Add thisjob */
592 startjob = j; /* Will start it */
594 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
595 startjob = -1; /* Don't start it - already running */
599 /* We don't have to wait for a job to complete */
601 jobs[numjobs] = thisjob; /* Add this job */
603 startjob = numjobs; /* Will start it */
607 /* Start up a new salvage job on a partition in job slot "startjob" */
608 if (startjob != -1) {
610 Log("Starting salvage of file system partition %s\n",
611 jobs[startjob]->partP->name);
613 /* For NT, we not only fork, but re-exec the salvager. Pass in the
614 * commands and pass the child job number via the data path.
617 nt_SalvagePartition(jobs[startjob]->partP->name,
618 jobs[startjob]->jobnumb);
619 jobs[startjob]->pid = pid;
624 jobs[startjob]->pid = pid;
630 for (fd = 0; fd < 16; fd++)
637 openlog("salvager", LOG_PID, useSyslogFacility);
641 (void)afs_snprintf(logFileName, sizeof logFileName,
643 AFSDIR_SERVER_SLVGLOG_FILEPATH,
644 jobs[startjob]->jobnumb);
645 logFile = afs_fopen(logFileName, "w");
650 SalvageFileSys1(jobs[startjob]->partP, 0);
655 } /* while ( thisjob || (!partP && numjobs > 0) ) */
657 /* If waited for all jobs to complete, now collect log files and return */
659 if (!useSyslog) /* if syslogging - no need to collect */
662 for (i = 0; i < jobcount; i++) {
663 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
664 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
665 if ((passLog = afs_fopen(logFileName, "r"))) {
666 while (fgets(buf, sizeof(buf), passLog)) {
671 (void)unlink(logFileName);
680 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
682 if (!canfork || debug || Fork() == 0) {
683 SalvageFileSys1(partP, singleVolumeNumber);
684 if (canfork && !debug) {
689 Wait("SalvageFileSys");
693 get_DevName(char *pbuffer, char *wpath)
695 char pbuf[128], *ptr;
696 strcpy(pbuf, pbuffer);
697 ptr = (char *)strrchr(pbuf, '/');
703 ptr = (char *)strrchr(pbuffer, '/');
705 strcpy(pbuffer, ptr + 1);
712 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
715 char inodeListPath[256];
717 static char tmpDevName[100];
718 static char wpath[100];
719 struct VolumeSummary *vsp, *esp;
723 fileSysPartition = partP;
724 fileSysDevice = fileSysPartition->device;
725 fileSysPathName = VPartitionPath(fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
730 name = partP->devName;
732 fileSysPath = fileSysPathName;
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 fileSysDeviceName = name;
736 filesysfulldev = wpath;
739 VLockPartition(partP->name);
740 if (singleVolumeNumber || ForceSalvage)
743 ForceSalvage = UseTheForceLuke(fileSysPath);
745 if (singleVolumeNumber) {
746 /* salvageserver already setup fssync conn for us */
747 if ((programType != salvageServer) && !VConnectFS()) {
748 Abort("Couldn't connect to file server\n");
750 AskOffline(singleVolumeNumber, partP->name);
753 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
754 partP->name, name, (Testing ? "(READONLY mode)" : ""));
756 Log("***Forced salvage of all volumes on this partition***\n");
761 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
768 assert((dirp = opendir(fileSysPath)) != NULL);
769 while ((dp = readdir(dirp))) {
770 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
771 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
773 Log("Removing old salvager temp files %s\n", dp->d_name);
774 strcpy(npath, fileSysPath);
776 strcat(npath, dp->d_name);
782 tdir = (tmpdir ? tmpdir : fileSysPath);
784 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
785 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
787 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
791 inodeFile = fopen(inodeListPath, "w+b");
793 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
796 /* Using nt_unlink here since we're really using the delete on close
797 * semantics of unlink. In most places in the salvager, we really do
798 * mean to unlink the file at that point. Those places have been
799 * modified to actually do that so that the NT crt can be used there.
801 code = nt_unlink(inodeListPath);
803 code = unlink(inodeListPath);
806 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
809 if (GetInodeSummary(inodeFile, singleVolumeNumber) < 0) {
813 inodeFd = fileno(inodeFile);
815 Abort("Temporary file %s is missing...\n", inodeListPath);
816 afs_lseek(inodeFd, 0L, SEEK_SET);
817 if (ListInodeOption) {
821 /* enumerate volumes in the partition.
822 * figure out sets of read-only + rw volumes.
823 * salvage each set, read-only volumes first, then read-write.
824 * Fix up inodes on last volume in set (whether it is read-write
827 GetVolumeSummary(singleVolumeNumber);
829 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
830 i < nVolumesInInodeFile; i = j) {
831 VolumeId rwvid = inodeSummary[i].RWvolumeId;
833 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
835 VolumeId vid = inodeSummary[j].volumeId;
836 struct VolumeSummary *tsp;
837 /* Scan volume list (from partition root directory) looking for the
838 * current rw volume number in the volume list from the inode scan.
839 * If there is one here that is not in the inode volume list,
841 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
843 DeleteExtraVolumeHeaderFile(vsp);
845 /* Now match up the volume summary info from the root directory with the
846 * entry in the volume list obtained from scanning inodes */
847 inodeSummary[j].volSummary = NULL;
848 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
849 if (tsp->header.id == vid) {
850 inodeSummary[j].volSummary = tsp;
856 /* Salvage the group of volumes (several read-only + 1 read/write)
857 * starting with the current read-only volume we're looking at.
859 SalvageVolumeGroup(&inodeSummary[i], j - i);
862 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
863 for (; vsp < esp; vsp++) {
865 DeleteExtraVolumeHeaderFile(vsp);
868 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
869 RemoveTheForce(fileSysPath);
871 if (!Testing && singleVolumeNumber) {
872 AskOnline(singleVolumeNumber, fileSysPartition->name);
874 /* Step through the volumeSummary list and set all volumes on-line.
875 * The volumes were taken off-line in GetVolumeSummary.
877 for (j = 0; j < nVolumes; j++) {
878 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
882 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
883 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
886 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
890 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
893 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
896 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
899 code = VDestroyVolumeDiskHeader(fileSysPartition, vsp->header.id, vsp->header.parent);
901 Log("Error %ld destroying volume disk header for volume %lu\n",
902 afs_printable_int32_ld(code),
903 afs_printable_uint32_lu(vsp->header.id));
906 /* make sure we actually delete the fileName file; ENOENT
907 * is fine, since VDestroyVolumeDiskHeader probably already
909 if (unlink(path) && errno != ENOENT) {
910 Log("Unable to unlink %s (errno = %d)\n", path, errno);
917 CompareInodes(const void *_p1, const void *_p2)
919 register const struct ViceInodeInfo *p1 = _p1;
920 register const struct ViceInodeInfo *p2 = _p2;
921 if (p1->u.vnode.vnodeNumber == INODESPECIAL
922 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
923 VolumeId p1rwid, p2rwid;
925 (p1->u.vnode.vnodeNumber ==
926 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
928 (p2->u.vnode.vnodeNumber ==
929 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
934 if (p1->u.vnode.vnodeNumber == INODESPECIAL
935 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
936 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
937 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
938 if (p1->u.vnode.volumeId == p1rwid)
940 if (p2->u.vnode.volumeId == p2rwid)
942 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
944 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
945 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
946 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
948 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
950 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
952 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
954 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
956 /* The following tests are reversed, so that the most desirable
957 * of several similar inodes comes first */
958 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
960 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
961 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
965 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
966 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
971 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
973 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
974 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
978 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
979 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
984 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
986 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
987 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
991 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
992 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
997 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
999 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1000 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1003 #ifdef AFS_SGI_EXMAG
1004 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1005 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1014 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
1015 register struct InodeSummary *summary)
1017 VolumeId volume = ip->u.vnode.volumeId;
1018 VolumeId rwvolume = volume;
1019 register int n, nSpecial;
1020 register Unique maxunique;
1023 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1025 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1027 rwvolume = ip->u.special.parentId;
1028 /* This isn't quite right, as there could (in error) be different
1029 * parent inodes in different special vnodes */
1031 if (maxunique < ip->u.vnode.vnodeUniquifier)
1032 maxunique = ip->u.vnode.vnodeUniquifier;
1036 summary->volumeId = volume;
1037 summary->RWvolumeId = rwvolume;
1038 summary->nInodes = n;
1039 summary->nSpecialInodes = nSpecial;
1040 summary->maxUniquifier = maxunique;
1044 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1046 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1047 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1048 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1053 * Collect list of inodes in file named by path. If a truly fatal error,
1054 * unlink the file and abort. For lessor errors, return -1. The file will
1055 * be unlinked by the caller.
1058 GetInodeSummary(FILE *inodeFile, VolumeId singleVolumeNumber)
1060 struct afs_stat status;
1063 struct ViceInodeInfo *ip;
1064 struct InodeSummary summary;
1065 char summaryFileName[50];
1068 char *dev = fileSysPath;
1069 char *wpath = fileSysPath;
1071 char *dev = fileSysDeviceName;
1072 char *wpath = filesysfulldev;
1074 char *part = fileSysPath;
1077 /* This file used to come from vfsck; cobble it up ourselves now... */
1079 ListViceInodes(dev, fileSysPath, inodeFile,
1080 singleVolumeNumber ? OnlyOneVolume : 0,
1081 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1083 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1086 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1088 if (forceSal && !ForceSalvage) {
1089 Log("***Forced salvage of all volumes on this partition***\n");
1092 fseek(inodeFile, 0L, SEEK_SET);
1093 inodeFd = fileno(inodeFile);
1094 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1095 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1097 tdir = (tmpdir ? tmpdir : part);
1099 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1100 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1102 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1103 "%s/salvage.temp.%d", tdir, getpid());
1105 summaryFile = afs_fopen(summaryFileName, "a+");
1106 if (summaryFile == NULL) {
1107 Abort("Unable to create inode summary file\n");
1111 /* Using nt_unlink here since we're really using the delete on close
1112 * semantics of unlink. In most places in the salvager, we really do
1113 * mean to unlink the file at that point. Those places have been
1114 * modified to actually do that so that the NT crt can be used there.
1116 code = nt_unlink(summaryFileName);
1118 code = unlink(summaryFileName);
1121 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1124 if (!canfork || debug || Fork() == 0) {
1126 unsigned long st_size=(unsigned long) status.st_size;
1127 nInodes = st_size / sizeof(struct ViceInodeInfo);
1129 fclose(summaryFile);
1130 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1131 RemoveTheForce(fileSysPath);
1133 struct VolumeSummary *vsp;
1136 GetVolumeSummary(singleVolumeNumber);
1138 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1140 DeleteExtraVolumeHeaderFile(vsp);
1143 Log("%s vice inodes on %s; not salvaged\n",
1144 singleVolumeNumber ? "No applicable" : "No", dev);
1147 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1149 fclose(summaryFile);
1151 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1154 if (read(inodeFd, ip, st_size) != st_size) {
1155 fclose(summaryFile);
1156 Abort("Unable to read inode table; %s not salvaged\n", dev);
1158 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1159 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1160 || write(inodeFd, ip, st_size) != st_size) {
1161 fclose(summaryFile);
1162 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1166 CountVolumeInodes(ip, nInodes, &summary);
1167 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1168 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1169 fclose(summaryFile);
1172 summary.index += (summary.nInodes);
1173 nInodes -= summary.nInodes;
1174 ip += summary.nInodes;
1176 /* Following fflush is not fclose, because if it was debug mode would not work */
1177 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1178 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1179 fclose(summaryFile);
1182 if (canfork && !debug) {
1187 if (Wait("Inode summary") == -1) {
1188 fclose(summaryFile);
1189 Exit(1); /* salvage of this partition aborted */
1192 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1193 if (status.st_size != 0) {
1195 unsigned long st_status=(unsigned long)status.st_size;
1196 inodeSummary = (struct InodeSummary *)malloc(st_status);
1197 assert(inodeSummary != NULL);
1198 /* For GNU we need to do lseek to get the file pointer moved. */
1199 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1200 ret = read(fileno(summaryFile), inodeSummary, st_status);
1201 assert(ret == st_status);
1203 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1204 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1205 fclose(summaryFile);
1209 /* Comparison routine for volume sort.
1210 This is setup so that a read-write volume comes immediately before
1211 any read-only clones of that volume */
1213 CompareVolumes(const void *_p1, const void *_p2)
1215 register const struct VolumeSummary *p1 = _p1;
1216 register const struct VolumeSummary *p2 = _p2;
1217 if (p1->header.parent != p2->header.parent)
1218 return p1->header.parent < p2->header.parent ? -1 : 1;
1219 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1221 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1223 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1227 * Gleans volumeSummary information by asking the fileserver
1229 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1230 * salvaging a whole partition
1232 * @return whether we obtained the volume summary information or not
1233 * @retval 0 success; we obtained the volume summary information
1234 * @retval nonzero we did not get the volume summary information; either the
1235 * fileserver responded with an error, or we are not supposed to
1236 * ask the fileserver for the information (e.g. we are salvaging
1237 * the entire partition or we are not the salvageserver)
1239 * @note for non-DAFS, always returns 1
1242 AskVolumeSummary(VolumeId singleVolumeNumber)
1245 #ifdef FSSYNC_BUILD_CLIENT
1246 if (programType == salvageServer) {
1247 if (singleVolumeNumber) {
1248 FSSYNC_VGQry_response_t q_res;
1250 struct VolumeSummary *vsp;
1252 struct VolumeDiskHeader diskHdr;
1254 memset(&res, 0, sizeof(res));
1256 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1259 * We must wait for the partition to finish scanning before
1260 * can continue, since we will not know if we got the entire
1261 * VG membership unless the partition is fully scanned.
1262 * We could, in theory, just scan the partition ourselves if
1263 * the VG cache is not ready, but we would be doing the exact
1264 * same scan the fileserver is doing; it will almost always
1265 * be faster to wait for the fileserver. The only exceptions
1266 * are if the partition does not take very long to scan, and
1267 * in that case it's fast either way, so who cares?
1269 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1270 Log("waiting for fileserver to finish scanning partition %s...\n",
1271 fileSysPartition->name);
1273 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1274 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1275 * just so small partitions don't need to wait over 10
1276 * seconds every time, and large partitions are generally
1277 * polled only once every ten seconds. */
1278 sleep((i > 10) ? (i = 10) : i);
1280 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1284 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1285 /* This can happen if there's no header for the volume
1286 * we're salvaging, or no headers exist for the VG (if
1287 * we're salvaging an RW). Act as if we got a response
1288 * with no VG members. The headers may be created during
1289 * salvaging, if there are inodes in this VG. */
1291 memset(&q_res, 0, sizeof(q_res));
1292 q_res.rw = singleVolumeNumber;
1296 Log("fileserver refused VGCQuery request for volume %lu on "
1297 "partition %s, code %ld reason %ld\n",
1298 afs_printable_uint32_lu(singleVolumeNumber),
1299 fileSysPartition->name,
1300 afs_printable_int32_ld(code),
1301 afs_printable_int32_ld(res.hdr.reason));
1305 if (q_res.rw != singleVolumeNumber) {
1306 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1307 afs_printable_uint32_lu(singleVolumeNumber),
1308 afs_printable_uint32_lu(q_res.rw));
1309 #ifdef SALVSYNC_BUILD_CLIENT
1310 if (SALVSYNC_LinkVolume(q_res.rw,
1312 fileSysPartition->name,
1314 Log("schedule request failed\n");
1316 #endif /* SALVSYNC_BUILD_CLIENT */
1317 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1320 volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1321 assert(volumeSummaryp != NULL);
1324 vsp = volumeSummaryp;
1326 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1327 char name[VMAXPATHLEN];
1329 if (!q_res.children[i]) {
1333 if (q_res.children[i] != singleVolumeNumber) {
1334 AskOffline(q_res.children[i], fileSysPartition->name);
1336 code = VReadVolumeDiskHeader(q_res.children[i], fileSysPartition, &diskHdr);
1338 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1339 afs_printable_uint32_lu(q_res.children[i]));
1344 DiskToVolumeHeader(&vsp->header, &diskHdr);
1345 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1346 vsp->fileName = ToString(name);
1351 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1356 Log("Cannot get volume summary from fileserver; falling back to scanning "
1357 "entire partition\n");
1360 #endif /* FSSYNC_BUILD_CLIENT */
1365 GetVolumeSummary(VolumeId singleVolumeNumber)
1368 afs_int32 nvols = 0;
1369 struct VolumeSummary *vsp, vs;
1370 struct VolumeDiskHeader diskHeader;
1373 if (AskVolumeSummary(singleVolumeNumber) == 0) {
1374 /* we successfully got the vol information from the fileserver; no
1375 * need to scan the partition */
1379 /* Get headers from volume directory */
1380 dirp = opendir(fileSysPath);
1382 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1383 if (!singleVolumeNumber) {
1384 while ((dp = readdir(dirp))) {
1385 char *p = dp->d_name;
1386 p = strrchr(dp->d_name, '.');
1387 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1390 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1391 if ((fd = afs_open(name, O_RDONLY)) != -1
1392 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1393 == sizeof(diskHeader)
1394 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1395 DiskToVolumeHeader(&vs.header, &diskHeader);
1403 dirp = opendir("."); /* No rewinddir for NT */
1410 nvols = VOL_VG_MAX_VOLS;
1413 volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1414 assert(volumeSummaryp != NULL);
1417 vsp = volumeSummaryp;
1418 while ((dp = readdir(dirp))) {
1419 char *p = dp->d_name;
1420 p = strrchr(dp->d_name, '.');
1421 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1425 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1426 if ((fd = afs_open(name, O_RDONLY)) == -1
1427 || read(fd, &diskHeader, sizeof(diskHeader))
1428 != sizeof(diskHeader)
1429 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1434 if (!singleVolumeNumber) {
1436 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1439 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1444 char nameShouldBe[64];
1445 DiskToVolumeHeader(&vsp->header, &diskHeader);
1446 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1447 && vsp->header.parent != singleVolumeNumber) {
1448 if (programType == salvageServer) {
1449 #ifdef SALVSYNC_BUILD_CLIENT
1450 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1451 vsp->header.id, vsp->header.parent);
1452 if (SALVSYNC_LinkVolume(vsp->header.parent,
1454 fileSysPartition->name,
1456 Log("schedule request failed\n");
1459 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1461 Log("%u is a read-only volume; not salvaged\n",
1462 singleVolumeNumber);
1466 if (!singleVolumeNumber
1467 || (vsp->header.id == singleVolumeNumber
1468 || vsp->header.parent == singleVolumeNumber)) {
1469 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1470 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1471 if (singleVolumeNumber
1472 && vsp->header.id != singleVolumeNumber)
1473 AskOffline(vsp->header.id, fileSysPartition->name);
1474 if (strcmp(nameShouldBe, dp->d_name)) {
1476 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", name, (Testing ? "it would have been " : ""));
1479 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1483 vsp->fileName = ToString(dp->d_name);
1493 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1497 /* Find the link table. This should be associated with the RW volume or, if
1498 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1501 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1502 struct ViceInodeInfo *allInodes)
1505 struct ViceInodeInfo *ip;
1507 for (i = 0; i < nVols; i++) {
1508 ip = allInodes + isp[i].index;
1509 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1510 if (ip[j].u.special.type == VI_LINKTABLE)
1511 return ip[j].inodeNumber;
1518 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1520 struct versionStamp version;
1523 if (!VALID_INO(ino))
1525 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1526 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1527 if (!VALID_INO(ino))
1529 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1530 isp->RWvolumeId, errno);
1531 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1532 fdP = IH_OPEN(VGLinkH);
1534 Abort("Can't open link table for volume %u (error = %d)\n",
1535 isp->RWvolumeId, errno);
1537 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1538 Abort("Can't truncate link table for volume %u (error = %d)\n",
1539 isp->RWvolumeId, errno);
1541 version.magic = LINKTABLEMAGIC;
1542 version.version = LINKTABLEVERSION;
1544 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1546 Abort("Can't truncate link table for volume %u (error = %d)\n",
1547 isp->RWvolumeId, errno);
1549 FDH_REALLYCLOSE(fdP);
1551 /* If the volume summary exits (i.e., the V*.vol header file exists),
1552 * then set this inode there as well.
1554 if (isp->volSummary)
1555 isp->volSummary->header.linkTable = ino;
1564 SVGParms_t *parms = (SVGParms_t *) arg;
1565 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1570 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1573 pthread_attr_t tattr;
1577 /* Initialize per volume global variables, even if later code does so */
1581 memset(&VolInfo, 0, sizeof(VolInfo));
1583 parms.svgp_inodeSummaryp = isp;
1584 parms.svgp_count = nVols;
1585 code = pthread_attr_init(&tattr);
1587 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1591 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1593 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1596 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1598 Log("Failed to create thread to salvage volume group %u\n",
1602 (void)pthread_join(tid, NULL);
1604 #endif /* AFS_NT40_ENV */
1607 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1609 struct ViceInodeInfo *inodes, *allInodes, *ip;
1610 int i, totalInodes, size, salvageTo;
1614 int dec_VGLinkH = 0;
1616 FdHandle_t *fdP = NULL;
1619 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1620 && isp->nSpecialInodes > 0);
1621 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1622 if (!ForceSalvage && QuickCheck(isp, nVols))
1625 if (ShowMounts && !haveRWvolume)
1627 if (canfork && !debug && Fork() != 0) {
1628 (void)Wait("Salvage volume group");
1631 for (i = 0, totalInodes = 0; i < nVols; i++)
1632 totalInodes += isp[i].nInodes;
1633 size = totalInodes * sizeof(struct ViceInodeInfo);
1634 inodes = (struct ViceInodeInfo *)malloc(size);
1635 allInodes = inodes - isp->index; /* this would the base of all the inodes
1636 * for the partition, if all the inodes
1637 * had been read into memory */
1639 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1641 assert(read(inodeFd, inodes, size) == size);
1643 /* Don't try to salvage a read write volume if there isn't one on this
1645 salvageTo = haveRWvolume ? 0 : 1;
1647 #ifdef AFS_NAMEI_ENV
1648 ino = FindLinkHandle(isp, nVols, allInodes);
1649 if (VALID_INO(ino)) {
1650 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1651 fdP = IH_OPEN(VGLinkH);
1653 if (!VALID_INO(ino) || fdP == NULL) {
1654 Log("%s link table for volume %u.\n",
1655 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1657 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1660 struct ViceInodeInfo *ip;
1661 CreateLinkTable(isp, ino);
1662 fdP = IH_OPEN(VGLinkH);
1663 /* Sync fake 1 link counts to the link table, now that it exists */
1665 for (i = 0; i < nVols; i++) {
1666 ip = allInodes + isp[i].index;
1667 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1669 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1671 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1679 FDH_REALLYCLOSE(fdP);
1681 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1684 /* Salvage in reverse order--read/write volume last; this way any
1685 * Inodes not referenced by the time we salvage the read/write volume
1686 * can be picked up by the read/write volume */
1687 /* ACTUALLY, that's not done right now--the inodes just vanish */
1688 for (i = nVols - 1; i >= salvageTo; i--) {
1690 struct InodeSummary *lisp = &isp[i];
1691 #ifdef AFS_NAMEI_ENV
1692 /* If only the RO is present on this partition, the link table
1693 * shows up as a RW volume special file. Need to make sure the
1694 * salvager doesn't try to salvage the non-existent RW.
1696 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1697 /* If this only special inode is the link table, continue */
1698 if (inodes->u.special.type == VI_LINKTABLE) {
1705 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1706 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1707 /* Check inodes twice. The second time do things seriously. This
1708 * way the whole RO volume can be deleted, below, if anything goes wrong */
1709 for (check = 1; check >= 0; check--) {
1711 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1713 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1714 if (rw && deleteMe) {
1715 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1716 * volume won't be called */
1722 if (rw && check == 1)
1724 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1725 MaybeZapVolume(lisp, "Vnode index", 0, check);
1731 /* Fix actual inode counts */
1733 Log("totalInodes %d\n",totalInodes);
1734 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1735 static int TraceBadLinkCounts = 0;
1736 #ifdef AFS_NAMEI_ENV
1737 if (VGLinkH->ih_ino == ip->inodeNumber) {
1738 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1739 VGLinkH_p1 = ip->u.param[0];
1740 continue; /* Deal with this last. */
1743 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1744 TraceBadLinkCounts--; /* Limit reports, per volume */
1745 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1747 while (ip->linkCount > 0) {
1748 /* below used to assert, not break */
1750 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1751 Log("idec failed. inode %s errno %d\n",
1752 PrintInode(NULL, ip->inodeNumber), errno);
1758 while (ip->linkCount < 0) {
1759 /* these used to be asserts */
1761 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1762 Log("iinc failed. inode %s errno %d\n",
1763 PrintInode(NULL, ip->inodeNumber), errno);
1770 #ifdef AFS_NAMEI_ENV
1771 while (dec_VGLinkH > 0) {
1772 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1773 Log("idec failed on link table, errno = %d\n", errno);
1777 while (dec_VGLinkH < 0) {
1778 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1779 Log("iinc failed on link table, errno = %d\n", errno);
1786 /* Directory consistency checks on the rw volume */
1788 SalvageVolume(isp, VGLinkH);
1789 IH_RELEASE(VGLinkH);
1791 if (canfork && !debug) {
1798 QuickCheck(register struct InodeSummary *isp, int nVols)
1800 /* Check headers BEFORE forking */
1804 for (i = 0; i < nVols; i++) {
1805 struct VolumeSummary *vs = isp[i].volSummary;
1806 VolumeDiskData volHeader;
1808 /* Don't salvage just because phantom rw volume is there... */
1809 /* (If a read-only volume exists, read/write inodes must also exist) */
1810 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1814 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1815 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1816 == sizeof(volHeader)
1817 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1818 && volHeader.dontSalvage == DONT_SALVAGE
1819 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1820 if (volHeader.inUse != 0) {
1821 volHeader.inUse = 0;
1822 volHeader.inService = 1;
1824 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1825 != sizeof(volHeader)) {
1841 /* SalvageVolumeHeaderFile
1843 * Salvage the top level V*.vol header file. Make sure the special files
1844 * exist and that there are no duplicates.
1846 * Calls SalvageHeader for each possible type of volume special file.
1850 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1851 register struct ViceInodeInfo *inodes, int RW,
1852 int check, int *deleteMe)
1855 register struct ViceInodeInfo *ip;
1856 int allinodesobsolete = 1;
1857 struct VolumeDiskHeader diskHeader;
1858 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
1861 /* keeps track of special inodes that are probably 'good'; they are
1862 * referenced in the vol header, and are included in the given inodes
1867 } goodspecial[MAXINODETYPE];
1872 memset(goodspecial, 0, sizeof(goodspecial));
1874 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
1876 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
1878 Log("cannot allocate memory for inode skip array when salvaging "
1879 "volume %lu; not performing duplicate special inode recovery\n",
1880 afs_printable_uint32_lu(isp->volumeId));
1881 /* still try to perform the salvage; the skip array only does anything
1882 * if we detect duplicate special inodes */
1886 * First, look at the special inodes and see if any are referenced by
1887 * the existing volume header. If we find duplicate special inodes, we
1888 * can use this information to use the referenced inode (it's more
1889 * likely to be the 'good' one), and throw away the duplicates.
1891 if (isp->volSummary && skip) {
1892 /* use tempHeader, so we can use the stuff[] array to easily index
1893 * into the isp->volSummary special inodes */
1894 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
1896 for (i = 0; i < isp->nSpecialInodes; i++) {
1897 ip = &inodes[isp->index + i];
1898 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1899 /* will get taken care of in a later loop */
1902 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
1903 goodspecial[ip->u.special.type-1].valid = 1;
1904 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
1909 memset(&tempHeader, 0, sizeof(tempHeader));
1910 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1911 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1912 tempHeader.id = isp->volumeId;
1913 tempHeader.parent = isp->RWvolumeId;
1915 /* Check for duplicates (inodes are sorted by type field) */
1916 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1917 ip = &inodes[isp->index + i];
1918 if (ip->u.special.type == (ip + 1)->u.special.type) {
1919 afs_ino_str_t stmp1, stmp2;
1921 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1922 /* Will be caught in the loop below */
1926 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
1927 ip->u.special.type, isp->volumeId,
1928 PrintInode(stmp1, ip->inodeNumber),
1929 PrintInode(stmp2, (ip+1)->inodeNumber));
1931 if (skip && goodspecial[ip->u.special.type-1].valid) {
1932 Inode gi = goodspecial[ip->u.special.type-1].inode;
1935 Log("using special inode referenced by vol header (%s)\n",
1936 PrintInode(stmp1, gi));
1939 /* the volume header references some special inode of
1940 * this type in the inodes array; are we it? */
1941 if (ip->inodeNumber != gi) {
1943 } else if ((ip+1)->inodeNumber != gi) {
1944 /* in case this is the last iteration; we need to
1945 * make sure we check ip+1, too */
1950 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
1958 for (i = 0; i < isp->nSpecialInodes; i++) {
1959 ip = &inodes[isp->index + i];
1960 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1962 Log("Rubbish header inode %s of type %d\n",
1963 PrintInode(NULL, ip->inodeNumber),
1964 ip->u.special.type);
1970 Log("Rubbish header inode %s of type %d; deleted\n",
1971 PrintInode(NULL, ip->inodeNumber),
1972 ip->u.special.type);
1973 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1974 if (skip && skip[i]) {
1975 if (orphans == ORPH_REMOVE) {
1976 Log("Removing orphan special inode %s of type %d\n",
1977 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1980 Log("Ignoring orphan special inode %s of type %d\n",
1981 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1982 /* fall through to the ip->linkCount--; line below */
1985 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1986 allinodesobsolete = 0;
1988 if (!check && ip->u.special.type != VI_LINKTABLE)
1989 ip->linkCount--; /* Keep the inode around */
1997 if (allinodesobsolete) {
2004 VGLinkH_cnt++; /* one for every header. */
2006 if (!RW && !check && isp->volSummary) {
2007 ClearROInUseBit(isp->volSummary);
2011 for (i = 0; i < MAXINODETYPE; i++) {
2012 if (stuff[i].inodeType == VI_LINKTABLE) {
2013 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2014 * And we may have recreated the link table earlier, so set the
2015 * RW header as well.
2017 if (VALID_INO(VGLinkH->ih_ino)) {
2018 *stuff[i].inode = VGLinkH->ih_ino;
2022 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
2026 if (isp->volSummary == NULL) {
2028 char headerName[64];
2029 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2030 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2032 Log("No header file for volume %u\n", isp->volumeId);
2036 Log("No header file for volume %u; %screating %s\n",
2037 isp->volumeId, (Testing ? "it would have been " : ""),
2039 isp->volSummary = (struct VolumeSummary *)
2040 malloc(sizeof(struct VolumeSummary));
2041 isp->volSummary->fileName = ToString(headerName);
2043 writefunc = VCreateVolumeDiskHeader;
2046 char headerName[64];
2047 /* hack: these two fields are obsolete... */
2048 isp->volSummary->header.volumeAcl = 0;
2049 isp->volSummary->header.volumeMountTable = 0;
2052 (&isp->volSummary->header, &tempHeader,
2053 sizeof(struct VolumeHeader))) {
2054 /* We often remove the name before calling us, so we make a fake one up */
2055 if (isp->volSummary->fileName) {
2056 strcpy(headerName, isp->volSummary->fileName);
2058 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2059 isp->volSummary->fileName = ToString(headerName);
2061 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2063 Log("Header file %s is damaged or no longer valid%s\n", path,
2064 (check ? "" : "; repairing"));
2068 writefunc = VWriteVolumeDiskHeader;
2072 memcpy(&isp->volSummary->header, &tempHeader,
2073 sizeof(struct VolumeHeader));
2076 Log("It would have written a new header file for volume %u\n",
2080 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2081 code = (*writefunc)(&diskHeader, fileSysPartition);
2083 Log("Error %ld writing volume header file for volume %lu\n",
2084 afs_printable_int32_ld(code),
2085 afs_printable_uint32_lu(diskHeader.id));
2090 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
2091 isp->volSummary->header.volumeInfo);
2096 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
2100 VolumeDiskData volumeInfo;
2101 struct versionStamp fileHeader;
2110 #ifndef AFS_NAMEI_ENV
2111 if (sp->inodeType == VI_LINKTABLE)
2114 if (*(sp->inode) == 0) {
2116 Log("Missing inode in volume header (%s)\n", sp->description);
2120 Log("Missing inode in volume header (%s); %s\n", sp->description,
2121 (Testing ? "it would have recreated it" : "recreating"));
2124 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
2125 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2126 if (!VALID_INO(*(sp->inode)))
2128 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2129 sp->description, errno);
2134 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
2135 fdP = IH_OPEN(specH);
2136 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2137 /* bail out early and destroy the volume */
2139 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2146 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2147 sp->description, errno);
2150 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2151 || header.fileHeader.magic != sp->stamp.magic)) {
2153 Log("Part of the header (%s) is corrupted\n", sp->description);
2154 FDH_REALLYCLOSE(fdP);
2158 Log("Part of the header (%s) is corrupted; recreating\n",
2162 if (sp->inodeType == VI_VOLINFO
2163 && header.volumeInfo.destroyMe == DESTROY_ME) {
2166 FDH_REALLYCLOSE(fdP);
2170 if (recreate && !Testing) {
2173 ("Internal error: recreating volume header (%s) in check mode\n",
2175 code = FDH_TRUNC(fdP, 0);
2177 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2178 sp->description, errno);
2180 /* The following code should be moved into vutil.c */
2181 if (sp->inodeType == VI_VOLINFO) {
2183 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2184 header.volumeInfo.stamp = sp->stamp;
2185 header.volumeInfo.id = isp->volumeId;
2186 header.volumeInfo.parentId = isp->RWvolumeId;
2187 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2188 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2189 isp->volumeId, isp->volumeId);
2190 header.volumeInfo.inService = 0;
2191 header.volumeInfo.blessed = 0;
2192 /* The + 1000 is a hack in case there are any files out in venus caches */
2193 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2194 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2195 header.volumeInfo.needsCallback = 0;
2196 gettimeofday(&tp, 0);
2197 header.volumeInfo.creationDate = tp.tv_sec;
2198 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2200 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2201 sp->description, errno);
2204 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2205 sizeof(header.volumeInfo));
2206 if (code != sizeof(header.volumeInfo)) {
2209 ("Unable to write volume header file (%s) (errno = %d)\n",
2210 sp->description, errno);
2211 Abort("Unable to write entire volume header file (%s)\n",
2215 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2217 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2218 sp->description, errno);
2220 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2221 if (code != sizeof(sp->stamp)) {
2224 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2225 sp->description, errno);
2227 ("Unable to write entire version stamp in volume header file (%s)\n",
2232 FDH_REALLYCLOSE(fdP);
2234 if (sp->inodeType == VI_VOLINFO) {
2235 VolInfo = header.volumeInfo;
2238 if (VolInfo.updateDate) {
2239 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
2241 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
2242 (Testing ? "it would have been " : ""), update);
2244 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
2246 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
2247 VolInfo.id, update);
2257 SalvageVnodes(register struct InodeSummary *rwIsp,
2258 register struct InodeSummary *thisIsp,
2259 register struct ViceInodeInfo *inodes, int check)
2261 int ilarge, ismall, ioffset, RW, nInodes;
2262 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2265 RW = (rwIsp == thisIsp);
2266 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2268 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2269 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2270 if (check && ismall == -1)
2273 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2274 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2275 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2279 SalvageIndex(Inode ino, VnodeClass class, int RW,
2280 register struct ViceInodeInfo *ip, int nInodes,
2281 struct VolumeSummary *volSummary, int check)
2283 VolumeId volumeNumber;
2284 char buf[SIZEOF_LARGEDISKVNODE];
2285 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2287 StreamHandle_t *file;
2288 struct VnodeClassInfo *vcp;
2290 afs_fsize_t vnodeLength;
2291 int vnodeIndex, nVnodes;
2292 afs_ino_str_t stmp1, stmp2;
2296 volumeNumber = volSummary->header.id;
2297 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2298 fdP = IH_OPEN(handle);
2299 assert(fdP != NULL);
2300 file = FDH_FDOPEN(fdP, "r+");
2301 assert(file != NULL);
2302 vcp = &VnodeClassInfo[class];
2303 size = OS_SIZE(fdP->fd_fd);
2305 nVnodes = (size / vcp->diskSize) - 1;
2307 assert((nVnodes + 1) * vcp->diskSize == size);
2308 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2312 for (vnodeIndex = 0;
2313 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2314 nVnodes--, vnodeIndex++) {
2315 if (vnode->type != vNull) {
2316 int vnodeChanged = 0;
2317 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2318 /* Log programs that belong to root (potentially suid root);
2319 * don't bother for read-only or backup volumes */
2320 #ifdef notdef /* This is done elsewhere */
2321 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2322 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2324 if (VNDISK_GET_INO(vnode) == 0) {
2326 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2327 memset(vnode, 0, vcp->diskSize);
2331 if (vcp->magic != vnode->vnodeMagic) {
2332 /* bad magic #, probably partially created vnode */
2333 Log("Partially allocated vnode %d deleted.\n",
2335 memset(vnode, 0, vcp->diskSize);
2339 /* ****** Should do a bit more salvage here: e.g. make sure
2340 * vnode type matches what it should be given the index */
2341 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2342 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2343 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2344 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2351 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2352 /* The following doesn't work, because the version number
2353 * is not maintained correctly by the file server */
2354 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2355 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2357 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2363 /* For RW volume, look for vnode with matching inode number;
2364 * if no such match, take the first determined by our sort
2366 register struct ViceInodeInfo *lip = ip;
2367 register int lnInodes = nInodes;
2369 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2370 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2379 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2380 /* "Matching" inode */
2384 vu = vnode->uniquifier;
2385 iu = ip->u.vnode.vnodeUniquifier;
2386 vd = vnode->dataVersion;
2387 id = ip->u.vnode.inodeDataVersion;
2389 * Because of the possibility of the uniquifier overflows (> 4M)
2390 * we compare them modulo the low 22-bits; we shouldn't worry
2391 * about mismatching since they shouldn't to many old
2392 * uniquifiers of the same vnode...
2394 if (IUnique(vu) != IUnique(iu)) {
2396 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2399 vnode->uniquifier = iu;
2400 #ifdef AFS_3DISPARES
2401 vnode->dataVersion = (id >= vd ?
2404 1887437 ? vd : id) :
2407 1887437 ? id : vd));
2409 #if defined(AFS_SGI_EXMAG)
2410 vnode->dataVersion = (id >= vd ?
2413 15099494 ? vd : id) :
2416 15099494 ? id : vd));
2418 vnode->dataVersion = (id > vd ? id : vd);
2419 #endif /* AFS_SGI_EXMAG */
2420 #endif /* AFS_3DISPARES */
2423 /* don't bother checking for vd > id any more, since
2424 * partial file transfers always result in this state,
2425 * and you can't do much else anyway (you've already
2426 * found the best data you can) */
2427 #ifdef AFS_3DISPARES
2428 if (!vnodeIsDirectory(vnodeNumber)
2429 && ((vd < id && (id - vd) < 1887437)
2430 || ((vd > id && (vd - id) > 1887437)))) {
2432 #if defined(AFS_SGI_EXMAG)
2433 if (!vnodeIsDirectory(vnodeNumber)
2434 && ((vd < id && (id - vd) < 15099494)
2435 || ((vd > id && (vd - id) > 15099494)))) {
2437 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2438 #endif /* AFS_SGI_EXMAG */
2441 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2442 vnode->dataVersion = id;
2447 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2450 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2452 VNDISK_SET_INO(vnode, ip->inodeNumber);
2457 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2459 VNDISK_SET_INO(vnode, ip->inodeNumber);
2462 VNDISK_GET_LEN(vnodeLength, vnode);
2463 if (ip->byteCount != vnodeLength) {
2466 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2471 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2472 VNDISK_SET_LEN(vnode, ip->byteCount);
2476 ip->linkCount--; /* Keep the inode around */
2479 } else { /* no matching inode */
2480 if (VNDISK_GET_INO(vnode) != 0
2481 || vnode->type == vDirectory) {
2482 /* No matching inode--get rid of the vnode */
2484 if (VNDISK_GET_INO(vnode)) {
2486 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2490 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2495 if (VNDISK_GET_INO(vnode)) {
2497 time_t serverModifyTime = vnode->serverModifyTime;
2498 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2502 time_t serverModifyTime = vnode->serverModifyTime;
2503 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2506 memset(vnode, 0, vcp->diskSize);
2509 /* Should not reach here becuase we checked for
2510 * (inodeNumber == 0) above. And where we zero the vnode,
2511 * we also goto vnodeDone.
2515 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2519 } /* VNDISK_GET_INO(vnode) != 0 */
2521 assert(!(vnodeChanged && check));
2522 if (vnodeChanged && !Testing) {
2524 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2525 (char *)vnode, vcp->diskSize)
2527 VolumeChanged = 1; /* For break call back */
2538 struct VnodeEssence *
2539 CheckVnodeNumber(VnodeId vnodeNumber)
2542 struct VnodeInfo *vip;
2545 class = vnodeIdToClass(vnodeNumber);
2546 vip = &vnodeInfo[class];
2547 offset = vnodeIdToBitNumber(vnodeNumber);
2548 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2552 CopyOnWrite(register struct DirSummary *dir)
2554 /* Copy the directory unconditionally if we are going to change it:
2555 * not just if was cloned.
2557 struct VnodeDiskObject vnode;
2558 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2559 Inode oldinode, newinode;
2562 if (dir->copied || Testing)
2564 DFlush(); /* Well justified paranoia... */
2567 IH_IREAD(vnodeInfo[vLarge].handle,
2568 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2570 assert(code == sizeof(vnode));
2571 oldinode = VNDISK_GET_INO(&vnode);
2572 /* Increment the version number by a whole lot to avoid problems with
2573 * clients that were promised new version numbers--but the file server
2574 * crashed before the versions were written to disk.
2577 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2578 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2580 assert(VALID_INO(newinode));
2581 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2583 VNDISK_SET_INO(&vnode, newinode);
2585 IH_IWRITE(vnodeInfo[vLarge].handle,
2586 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2588 assert(code == sizeof(vnode));
2590 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2591 fileSysDevice, newinode);
2592 /* Don't delete the original inode right away, because the directory is
2593 * still being scanned.
2599 * This function should either successfully create a new dir, or give up
2600 * and leave things the way they were. In particular, if it fails to write
2601 * the new dir properly, it should return w/o changing the reference to the
2605 CopyAndSalvage(register struct DirSummary *dir)
2607 struct VnodeDiskObject vnode;
2608 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2609 Inode oldinode, newinode;
2614 afs_int32 parentUnique = 1;
2615 struct VnodeEssence *vnodeEssence;
2620 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2622 IH_IREAD(vnodeInfo[vLarge].handle,
2623 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2625 assert(lcode == sizeof(vnode));
2626 oldinode = VNDISK_GET_INO(&vnode);
2627 /* Increment the version number by a whole lot to avoid problems with
2628 * clients that were promised new version numbers--but the file server
2629 * crashed before the versions were written to disk.
2632 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2633 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2635 assert(VALID_INO(newinode));
2636 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2638 /* Assign . and .. vnode numbers from dir and vnode.parent.
2639 * The uniquifier for . is in the vnode.
2640 * The uniquifier for .. might be set to a bogus value of 1 and
2641 * the salvager will later clean it up.
2643 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2644 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2647 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2649 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2654 /* didn't really build the new directory properly, let's just give up. */
2655 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2656 Log("Directory salvage returned code %d, continuing.\n", code);
2658 Log("also failed to decrement link count on new inode");
2662 Log("Checking the results of the directory salvage...\n");
2663 if (!DirOK(&newdir)) {
2664 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2665 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2670 VNDISK_SET_INO(&vnode, newinode);
2671 length = Length(&newdir);
2672 VNDISK_SET_LEN(&vnode, length);
2674 IH_IWRITE(vnodeInfo[vLarge].handle,
2675 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2677 assert(lcode == sizeof(vnode));
2680 nt_sync(fileSysDevice);
2682 sync(); /* this is slow, but hopefully rarely called. We don't have
2683 * an open FD on the file itself to fsync.
2687 vnodeInfo[vLarge].handle->ih_synced = 1;
2689 /* make sure old directory file is really closed */
2690 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2691 FDH_REALLYCLOSE(fdP);
2693 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2695 dir->dirHandle = newdir;
2699 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2702 struct DirSummary *dir = (struct DirSummary *)dirVal;
2703 struct VnodeEssence *vnodeEssence;
2704 afs_int32 dirOrphaned, todelete;
2706 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2708 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2709 if (vnodeEssence == NULL) {
2711 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2715 assert(Delete(&dir->dirHandle, name) == 0);
2720 #ifndef AFS_NAMEI_ENV
2721 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2722 * mount inode for the partition. If this inode were deleted, it would crash
2725 if (vnodeEssence->InodeNumber == 0) {
2726 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2729 assert(Delete(&dir->dirHandle, name) == 0);
2736 if (!(vnodeNumber & 1) && !Showmode
2737 && !(vnodeEssence->count || vnodeEssence->unique
2738 || vnodeEssence->modeBits)) {
2739 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2740 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2741 vnodeNumber, unique,
2742 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2747 assert(Delete(&dir->dirHandle, name) == 0);
2753 /* Check if the Uniquifiers match. If not, change the directory entry
2754 * so its unique matches the vnode unique. Delete if the unique is zero
2755 * or if the directory is orphaned.
2757 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2758 if (!vnodeEssence->unique
2759 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2760 /* This is an orphaned directory. Don't delete the . or ..
2761 * entry. Otherwise, it will get created in the next
2762 * salvage and deleted again here. So Just skip it.
2767 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2770 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2774 fid.Vnode = vnodeNumber;
2775 fid.Unique = vnodeEssence->unique;
2777 assert(Delete(&dir->dirHandle, name) == 0);
2779 assert(Create(&dir->dirHandle, name, &fid) == 0);
2782 return 0; /* no need to continue */
2785 if (strcmp(name, ".") == 0) {
2786 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2789 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2792 assert(Delete(&dir->dirHandle, ".") == 0);
2793 fid.Vnode = dir->vnodeNumber;
2794 fid.Unique = dir->unique;
2795 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2798 vnodeNumber = fid.Vnode; /* Get the new Essence */
2799 unique = fid.Unique;
2800 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2803 } else if (strcmp(name, "..") == 0) {
2806 struct VnodeEssence *dotdot;
2807 pa.Vnode = dir->parent;
2808 dotdot = CheckVnodeNumber(pa.Vnode);
2809 assert(dotdot != NULL); /* XXX Should not be assert */
2810 pa.Unique = dotdot->unique;
2812 pa.Vnode = dir->vnodeNumber;
2813 pa.Unique = dir->unique;
2815 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2817 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2820 assert(Delete(&dir->dirHandle, "..") == 0);
2821 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2824 vnodeNumber = pa.Vnode; /* Get the new Essence */
2826 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2828 dir->haveDotDot = 1;
2829 } else if (strncmp(name, ".__afs", 6) == 0) {
2831 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2835 assert(Delete(&dir->dirHandle, name) == 0);
2837 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2838 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2841 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2842 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2843 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2844 && !(vnodeEssence->modeBits & 0111)) {
2850 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2851 vnodeEssence->InodeNumber);
2854 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2858 size = FDH_SIZE(fdP);
2860 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2861 FDH_REALLYCLOSE(fdP);
2868 code = FDH_READ(fdP, buf, size);
2871 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2872 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2873 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2874 Testing ? "would convert" : "converted");
2875 vnodeEssence->modeBits |= 0111;
2876 vnodeEssence->changed = 1;
2877 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2878 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2879 dir->name ? dir->name : "??", name, buf);
2881 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2882 dir->vname, vnodeNumber, size, code);
2884 FDH_REALLYCLOSE(fdP);
2887 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2888 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2889 if (vnodeIdToClass(vnodeNumber) == vLarge
2890 && vnodeEssence->name == NULL) {
2892 if ((n = (char *)malloc(strlen(name) + 1)))
2894 vnodeEssence->name = n;
2897 /* The directory entry points to the vnode. Check to see if the
2898 * vnode points back to the directory. If not, then let the
2899 * directory claim it (else it might end up orphaned). Vnodes
2900 * already claimed by another directory are deleted from this
2901 * directory: hardlinks to the same vnode are not allowed
2902 * from different directories.
2904 if (vnodeEssence->parent != dir->vnodeNumber) {
2905 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2906 /* Vnode does not point back to this directory.
2907 * Orphaned dirs cannot claim a file (it may belong to
2908 * another non-orphaned dir).
2911 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2913 vnodeEssence->parent = dir->vnodeNumber;
2914 vnodeEssence->changed = 1;
2916 /* Vnode was claimed by another directory */
2919 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2920 } else if (vnodeNumber == 1) {
2921 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2923 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2928 assert(Delete(&dir->dirHandle, name) == 0);
2933 /* This directory claims the vnode */
2934 vnodeEssence->claimed = 1;
2936 vnodeEssence->count--;
2941 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2943 register struct VnodeInfo *vip = &vnodeInfo[class];
2944 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2945 char buf[SIZEOF_LARGEDISKVNODE];
2946 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2948 StreamHandle_t *file;
2953 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2954 fdP = IH_OPEN(vip->handle);
2955 assert(fdP != NULL);
2956 file = FDH_FDOPEN(fdP, "r+");
2957 assert(file != NULL);
2958 size = OS_SIZE(fdP->fd_fd);
2960 vip->nVnodes = (size / vcp->diskSize) - 1;
2961 if (vip->nVnodes > 0) {
2962 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2963 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2964 assert((vip->vnodes = (struct VnodeEssence *)
2965 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2966 if (class == vLarge) {
2967 assert((vip->inodes = (Inode *)
2968 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2977 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2978 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2979 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2980 nVnodes--, vnodeIndex++) {
2981 if (vnode->type != vNull) {
2982 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2983 afs_fsize_t vnodeLength;
2984 vip->nAllocatedVnodes++;
2985 vep->count = vnode->linkCount;
2986 VNDISK_GET_LEN(vnodeLength, vnode);
2987 vep->blockCount = nBlocks(vnodeLength);
2988 vip->volumeBlockCount += vep->blockCount;
2989 vep->parent = vnode->parent;
2990 vep->unique = vnode->uniquifier;
2991 if (*maxu < vnode->uniquifier)
2992 *maxu = vnode->uniquifier;
2993 vep->modeBits = vnode->modeBits;
2994 vep->InodeNumber = VNDISK_GET_INO(vnode);
2995 vep->type = vnode->type;
2996 vep->author = vnode->author;
2997 vep->owner = vnode->owner;
2998 vep->group = vnode->group;
2999 if (vnode->type == vDirectory) {
3000 if (class != vLarge) {
3001 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3002 vip->nAllocatedVnodes--;
3003 memset(vnode, 0, sizeof(vnode));
3004 IH_IWRITE(vnodeInfo[vSmall].handle,
3005 vnodeIndexOffset(vcp, vnodeNumber),
3006 (char *)&vnode, sizeof(vnode));
3009 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3018 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
3020 struct VnodeEssence *parentvp;
3026 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
3027 && GetDirName(vp->parent, parentvp, path)) {
3029 strcat(path, vp->name);
3035 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3036 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3039 IsVnodeOrphaned(VnodeId vnode)
3041 struct VnodeEssence *vep;
3044 return (1); /* Vnode zero does not exist */
3046 return (0); /* The root dir vnode is always claimed */
3047 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
3048 if (!vep || !vep->claimed)
3049 return (1); /* Vnode is not claimed - it is orphaned */
3051 return (IsVnodeOrphaned(vep->parent));
3055 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
3056 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
3059 static struct DirSummary dir;
3060 static struct DirHandle dirHandle;
3061 struct VnodeEssence *parent;
3062 static char path[MAXPATHLEN];
3065 if (dirVnodeInfo->vnodes[i].salvaged)
3066 return; /* already salvaged */
3069 dirVnodeInfo->vnodes[i].salvaged = 1;
3071 if (dirVnodeInfo->inodes[i] == 0)
3072 return; /* Not allocated to a directory */
3074 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3075 if (dirVnodeInfo->vnodes[i].parent) {
3076 Log("Bad parent, vnode 1; %s...\n",
3077 (Testing ? "skipping" : "salvaging"));
3078 dirVnodeInfo->vnodes[i].parent = 0;
3079 dirVnodeInfo->vnodes[i].changed = 1;
3082 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
3083 if (parent && parent->salvaged == 0)
3084 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
3085 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3086 rootdir, rootdirfound);
3089 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3090 dir.unique = dirVnodeInfo->vnodes[i].unique;
3093 dir.parent = dirVnodeInfo->vnodes[i].parent;
3094 dir.haveDot = dir.haveDotDot = 0;
3095 dir.ds_linkH = alinkH;
3096 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
3097 dirVnodeInfo->inodes[i]);
3099 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3102 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3103 (Testing ? "skipping" : "salvaging"));
3106 CopyAndSalvage(&dir);
3110 dirHandle = dir.dirHandle;
3113 GetDirName(bitNumberToVnodeNumber(i, vLarge),
3114 &dirVnodeInfo->vnodes[i], path);
3117 /* If enumeration failed for random reasons, we will probably delete
3118 * too much stuff, so we guard against this instead.
3120 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
3123 /* Delete the old directory if it was copied in order to salvage.
3124 * CopyOnWrite has written the new inode # to the disk, but we still
3125 * have the old one in our local structure here. Thus, we idec the
3129 if (dir.copied && !Testing) {
3130 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3132 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3135 /* Remember rootdir DirSummary _after_ it has been judged */
3136 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3137 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3145 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
3147 /* This routine, for now, will only be called for read-write volumes */
3149 int BlocksInVolume = 0, FilesInVolume = 0;
3150 register VnodeClass class;
3151 struct DirSummary rootdir, oldrootdir;
3152 struct VnodeInfo *dirVnodeInfo;
3153 struct VnodeDiskObject vnode;
3154 VolumeDiskData volHeader;
3156 int orphaned, rootdirfound = 0;
3157 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3158 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3159 struct VnodeEssence *vep;
3162 afs_sfsize_t nBytes;
3164 VnodeId LFVnode, ThisVnode;
3165 Unique LFUnique, ThisUnique;
3168 vid = rwIsp->volSummary->header.id;
3169 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3170 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3171 assert(nBytes == sizeof(volHeader));
3172 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3173 assert(volHeader.destroyMe != DESTROY_ME);
3174 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3176 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
3178 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
3181 dirVnodeInfo = &vnodeInfo[vLarge];
3182 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3183 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
3187 nt_sync(fileSysDevice);
3189 sync(); /* This used to be done lower level, for every dir */
3196 /* Parse each vnode looking for orphaned vnodes and
3197 * connect them to the tree as orphaned (if requested).
3199 oldrootdir = rootdir;
3200 for (class = 0; class < nVNODECLASSES; class++) {
3201 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
3202 vep = &(vnodeInfo[class].vnodes[v]);
3203 ThisVnode = bitNumberToVnodeNumber(v, class);
3204 ThisUnique = vep->unique;
3206 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3207 continue; /* Ignore unused, claimed, and root vnodes */
3209 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3210 * entry in this vnode had incremented the parent link count (In
3211 * JudgeEntry()). We need to go to the parent and decrement that
3212 * link count. But if the parent's unique is zero, then the parent
3213 * link count was not incremented in JudgeEntry().
3215 if (class == vLarge) { /* directory vnode */
3216 pv = vnodeIdToBitNumber(vep->parent);
3217 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
3218 vnodeInfo[vLarge].vnodes[pv].count++;
3222 continue; /* If no rootdir, can't attach orphaned files */
3224 /* Here we attach orphaned files and directories into the
3225 * root directory, LVVnode, making sure link counts stay correct.
3227 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3228 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3229 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3231 /* Update this orphaned vnode's info. Its parent info and
3232 * link count (do for orphaned directories and files).
3234 vep->parent = LFVnode; /* Parent is the root dir */
3235 vep->unique = LFUnique;
3238 vep->count--; /* Inc link count (root dir will pt to it) */
3240 /* If this orphaned vnode is a directory, change '..'.
3241 * The name of the orphaned dir/file is unknown, so we
3242 * build a unique name. No need to CopyOnWrite the directory
3243 * since it is not connected to tree in BK or RO volume and
3244 * won't be visible there.
3246 if (class == vLarge) {
3250 /* Remove and recreate the ".." entry in this orphaned directory */
3251 SetSalvageDirHandle(&dh, vid, fileSysDevice,
3252 vnodeInfo[class].inodes[v]);
3254 pa.Unique = LFUnique;
3255 assert(Delete(&dh, "..") == 0);
3256 assert(Create(&dh, "..", &pa) == 0);
3258 /* The original parent's link count was decremented above.
3259 * Here we increment the new parent's link count.
3261 pv = vnodeIdToBitNumber(LFVnode);
3262 vnodeInfo[vLarge].vnodes[pv].count--;
3266 /* Go to the root dir and add this entry. The link count of the
3267 * root dir was incremented when ".." was created. Try 10 times.
3269 for (j = 0; j < 10; j++) {
3270 pa.Vnode = ThisVnode;
3271 pa.Unique = ThisUnique;
3273 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3275 vLarge) ? "__ORPHANDIR__" :
3276 "__ORPHANFILE__"), ThisVnode,
3279 CopyOnWrite(&rootdir);
3280 code = Create(&rootdir.dirHandle, npath, &pa);
3284 ThisUnique += 50; /* Try creating a different file */
3287 Log("Attaching orphaned %s to volume's root dir as %s\n",
3288 ((class == vLarge) ? "directory" : "file"), npath);
3290 } /* for each vnode in the class */
3291 } /* for each class of vnode */
3293 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3295 if (!oldrootdir.copied && rootdir.copied) {
3297 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3300 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3303 DFlush(); /* Flush the changes */
3304 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3305 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3306 orphans = ORPH_IGNORE;
3309 /* Write out all changed vnodes. Orphaned files and directories
3310 * will get removed here also (if requested).
3312 for (class = 0; class < nVNODECLASSES; class++) {
3313 int nVnodes = vnodeInfo[class].nVnodes;
3314 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3315 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3316 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3317 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3318 for (i = 0; i < nVnodes; i++) {
3319 register struct VnodeEssence *vnp = &vnodes[i];
3320 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3322 /* If the vnode is good but is unclaimed (not listed in
3323 * any directory entries), then it is orphaned.
3326 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3327 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3331 if (vnp->changed || vnp->count) {
3335 IH_IREAD(vnodeInfo[class].handle,
3336 vnodeIndexOffset(vcp, vnodeNumber),
3337 (char *)&vnode, sizeof(vnode));
3338 assert(nBytes == sizeof(vnode));
3340 vnode.parent = vnp->parent;
3341 oldCount = vnode.linkCount;
3342 vnode.linkCount = vnode.linkCount - vnp->count;
3345 orphaned = IsVnodeOrphaned(vnodeNumber);
3347 if (!vnp->todelete) {
3348 /* Orphans should have already been attached (if requested) */
3349 assert(orphans != ORPH_ATTACH);
3350 oblocks += vnp->blockCount;
3353 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3355 BlocksInVolume -= vnp->blockCount;
3357 if (VNDISK_GET_INO(&vnode)) {
3359 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3362 memset(&vnode, 0, sizeof(vnode));
3364 } else if (vnp->count) {
3366 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3369 vnode.modeBits = vnp->modeBits;
3372 vnode.dataVersion++;
3375 IH_IWRITE(vnodeInfo[class].handle,
3376 vnodeIndexOffset(vcp, vnodeNumber),
3377 (char *)&vnode, sizeof(vnode));