2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
110 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
111 #define WCOREDUMP(x) (x & 0200)
114 #include <afs/afsint.h>
115 #include <afs/assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
170 #include <afs/afsutil.h>
171 #include <afs/fileutil.h>
172 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
180 #include <afs/afssyscalls.h>
184 #include "partition.h"
185 #include "daemon_com.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
196 /*@+fcnmacros +macrofcndecl@*/
199 extern off64_t afs_lseek(int FD, off64_t O, int F);
200 #endif /*S_SPLINT_S */
201 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
202 #define afs_stat stat64
203 #define afs_fstat fstat64
204 #define afs_open open64
205 #define afs_fopen fopen64
206 #else /* !O_LARGEFILE */
208 extern off_t afs_lseek(int FD, off_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
211 #define afs_stat stat
212 #define afs_fstat fstat
213 #define afs_open open
214 #define afs_fopen fopen
215 #endif /* !O_LARGEFILE */
216 /*@=fcnmacros =macrofcndecl@*/
219 extern void *calloc();
221 static char *TimeStamp(time_t clock, int precision);
224 int debug; /* -d flag */
225 extern int Testing; /* -n flag */
226 int ListInodeOption; /* -i flag */
227 int ShowRootFiles; /* -r flag */
228 int RebuildDirs; /* -sal flag */
229 int Parallel = 4; /* -para X flag */
230 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
231 int forceR = 0; /* -b flag */
232 int ShowLog = 0; /* -showlog flag */
233 int ShowSuid = 0; /* -showsuid flag */
234 int ShowMounts = 0; /* -showmounts flag */
235 int orphans = ORPH_IGNORE; /* -orphans option */
240 int useSyslog = 0; /* -syslog flag */
241 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
250 #define MAXPARALLEL 32
252 int OKToZap; /* -o flag */
253 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
254 * in the volume header */
256 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
258 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
260 Device fileSysDevice; /* The device number of the current
261 * partition being salvaged */
265 char *fileSysPath; /* The path of the mounted partition currently
266 * being salvaged, i.e. the directory
267 * containing the volume headers */
269 char *fileSysPathName; /* NT needs this to make name pretty in log. */
270 IHandle_t *VGLinkH; /* Link handle for current volume group. */
271 int VGLinkH_cnt; /* # of references to lnk handle. */
272 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
274 char *fileSysDeviceName; /* The block device where the file system
275 * being salvaged was mounted */
276 char *filesysfulldev;
278 int VolumeChanged; /* Set by any routine which would change the volume in
279 * a way which would require callback is to be broken if the
280 * volume was put back on line by an active file server */
282 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
284 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
285 int inodeFd; /* File descriptor for inode file */
288 struct VnodeInfo vnodeInfo[nVNODECLASSES];
291 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
292 int nVolumes; /* Number of volumes (read-write and read-only)
293 * in volume summary */
299 /* Forward declarations */
300 /*@printflike@*/ void Log(const char *format, ...);
301 /*@printflike@*/ void Abort(const char *format, ...);
302 static int IsVnodeOrphaned(VnodeId vnode);
304 /* Uniquifier stored in the Inode */
309 return (u & 0x3fffff);
311 #if defined(AFS_SGI_EXMAG)
312 return (u & SGI_UNIQMASK);
315 #endif /* AFS_SGI_EXMAG */
320 BadError(register int aerror)
322 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
324 return 0; /* otherwise may be transient, e.g. EMFILE */
329 char *save_args[MAX_ARGS];
331 extern pthread_t main_thread;
332 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
335 /* Get the salvage lock if not already held. Hold until process exits. */
337 ObtainSalvageLock(void)
343 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
344 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
345 if (salvageLock == INVALID_FD) {
347 "salvager: There appears to be another salvager running! Aborted.\n");
352 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
353 if (salvageLock < 0) {
355 "salvager: can't open salvage lock file %s, aborting\n",
356 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
359 #ifdef AFS_DARWIN_ENV
360 if (flock(salvageLock, LOCK_EX) == -1) {
362 if (lockf(salvageLock, F_LOCK, 0) == -1) {
365 "salvager: There appears to be another salvager running! Aborted.\n");
372 #ifdef AFS_SGI_XFS_IOPS_ENV
373 /* Check if the given partition is mounted. For XFS, the root inode is not a
374 * constant. So we check the hard way.
377 IsPartitionMounted(char *part)
380 struct mntent *mntent;
382 assert(mntfp = setmntent(MOUNTED, "r"));
383 while (mntent = getmntent(mntfp)) {
384 if (!strcmp(part, mntent->mnt_dir))
389 return mntent ? 1 : 1;
392 /* Check if the given inode is the root of the filesystem. */
393 #ifndef AFS_SGI_XFS_IOPS_ENV
395 IsRootInode(struct afs_stat *status)
398 * The root inode is not a fixed value in XFS partitions. So we need to
399 * see if the partition is in the list of mounted partitions. This only
400 * affects the SalvageFileSys path, so we check there.
402 return (status->st_ino == ROOTINODE);
407 #ifndef AFS_NAMEI_ENV
408 /* We don't want to salvage big files filesystems, since we can't put volumes on
412 CheckIfBigFilesFS(char *mountPoint, char *devName)
414 struct superblock fs;
417 if (strncmp(devName, "/dev/", 5)) {
418 (void)sprintf(name, "/dev/%s", devName);
420 (void)strcpy(name, devName);
423 if (ReadSuper(&fs, name) < 0) {
424 Log("Unable to read superblock. Not salvaging partition %s.\n",
428 if (IsBigFilesFileSystem(&fs)) {
429 Log("Partition %s is a big files filesystem, not salvaging.\n",
439 #define HDSTR "\\Device\\Harddisk"
440 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
442 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
447 static int dowarn = 1;
449 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
451 if (strncmp(res, HDSTR, HDLEN)) {
454 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
455 res, HDSTR, p1->devName);
459 d1 = atoi(&res[HDLEN]);
461 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
463 if (strncmp(res, HDSTR, HDLEN)) {
466 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
467 res, HDSTR, p2->devName);
471 d2 = atoi(&res[HDLEN]);
476 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
479 /* This assumes that two partitions with the same device number divided by
480 * PartsPerDisk are on the same disk.
483 SalvageFileSysParallel(struct DiskPartition64 *partP)
486 struct DiskPartition64 *partP;
487 int pid; /* Pid for this job */
488 int jobnumb; /* Log file job number */
489 struct job *nextjob; /* Next partition on disk to salvage */
491 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
492 struct job *thisjob = 0;
493 static int numjobs = 0;
494 static int jobcount = 0;
500 char logFileName[256];
504 /* We have a partition to salvage. Copy it into thisjob */
505 thisjob = (struct job *)malloc(sizeof(struct job));
507 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
510 memset(thisjob, 0, sizeof(struct job));
511 thisjob->partP = partP;
512 thisjob->jobnumb = jobcount;
514 } else if (jobcount == 0) {
515 /* We are asking to wait for all jobs (partp == 0), yet we never
518 Log("No file system partitions named %s* found; not salvaged\n",
519 VICE_PARTITION_PREFIX);
523 if (debug || Parallel == 1) {
525 SalvageFileSys(thisjob->partP, 0);
532 /* Check to see if thisjob is for a disk that we are already
533 * salvaging. If it is, link it in as the next job to do. The
534 * jobs array has 1 entry per disk being salvages. numjobs is
535 * the total number of disks currently being salvaged. In
536 * order to keep thejobs array compact, when a disk is
537 * completed, the hightest element in the jobs array is moved
538 * down to now open slot.
540 for (j = 0; j < numjobs; j++) {
541 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
542 /* On same disk, add it to this list and return */
543 thisjob->nextjob = jobs[j]->nextjob;
544 jobs[j]->nextjob = thisjob;
551 /* Loop until we start thisjob or until all existing jobs are finished */
552 while (thisjob || (!partP && (numjobs > 0))) {
553 startjob = -1; /* No new job to start */
555 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
556 /* Either the max jobs are running or we have to wait for all
557 * the jobs to finish. In either case, we wait for at least one
558 * job to finish. When it's done, clean up after it.
560 pid = wait(&wstatus);
562 for (j = 0; j < numjobs; j++) { /* Find which job it is */
563 if (pid == jobs[j]->pid)
567 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
568 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
571 numjobs--; /* job no longer running */
572 oldjob = jobs[j]; /* remember */
573 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
574 free(oldjob); /* free the old job */
576 /* If there is another partition on the disk to salvage, then
577 * say we will start it (startjob). If not, then put thisjob there
578 * and say we will start it.
580 if (jobs[j]) { /* Another partitions to salvage */
581 startjob = j; /* Will start it */
582 } else { /* There is not another partition to salvage */
584 jobs[j] = thisjob; /* Add thisjob */
586 startjob = j; /* Will start it */
588 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
589 startjob = -1; /* Don't start it - already running */
593 /* We don't have to wait for a job to complete */
595 jobs[numjobs] = thisjob; /* Add this job */
597 startjob = numjobs; /* Will start it */
601 /* Start up a new salvage job on a partition in job slot "startjob" */
602 if (startjob != -1) {
604 Log("Starting salvage of file system partition %s\n",
605 jobs[startjob]->partP->name);
607 /* For NT, we not only fork, but re-exec the salvager. Pass in the
608 * commands and pass the child job number via the data path.
611 nt_SalvagePartition(jobs[startjob]->partP->name,
612 jobs[startjob]->jobnumb);
613 jobs[startjob]->pid = pid;
618 jobs[startjob]->pid = pid;
624 for (fd = 0; fd < 16; fd++)
631 openlog("salvager", LOG_PID, useSyslogFacility);
635 (void)afs_snprintf(logFileName, sizeof logFileName,
637 AFSDIR_SERVER_SLVGLOG_FILEPATH,
638 jobs[startjob]->jobnumb);
639 logFile = afs_fopen(logFileName, "w");
644 SalvageFileSys1(jobs[startjob]->partP, 0);
649 } /* while ( thisjob || (!partP && numjobs > 0) ) */
651 /* If waited for all jobs to complete, now collect log files and return */
653 if (!useSyslog) /* if syslogging - no need to collect */
656 for (i = 0; i < jobcount; i++) {
657 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
658 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
659 if ((passLog = afs_fopen(logFileName, "r"))) {
660 while (fgets(buf, sizeof(buf), passLog)) {
665 (void)unlink(logFileName);
674 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
676 if (!canfork || debug || Fork() == 0) {
677 SalvageFileSys1(partP, singleVolumeNumber);
678 if (canfork && !debug) {
683 Wait("SalvageFileSys");
687 get_DevName(char *pbuffer, char *wpath)
689 char pbuf[128], *ptr;
690 strcpy(pbuf, pbuffer);
691 ptr = (char *)strrchr(pbuf, '/');
697 ptr = (char *)strrchr(pbuffer, '/');
699 strcpy(pbuffer, ptr + 1);
706 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
709 char inodeListPath[256];
710 static char tmpDevName[100];
711 static char wpath[100];
712 struct VolumeSummary *vsp, *esp;
715 fileSysPartition = partP;
716 fileSysDevice = fileSysPartition->device;
717 fileSysPathName = VPartitionPath(fileSysPartition);
720 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
721 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
722 name = partP->devName;
724 fileSysPath = fileSysPathName;
725 strcpy(tmpDevName, partP->devName);
726 name = get_DevName(tmpDevName, wpath);
727 fileSysDeviceName = name;
728 filesysfulldev = wpath;
731 VLockPartition(partP->name);
732 if (singleVolumeNumber || ForceSalvage)
735 ForceSalvage = UseTheForceLuke(fileSysPath);
737 if (singleVolumeNumber) {
738 /* salvageserver already setup fssync conn for us */
739 if ((programType != salvageServer) && !VConnectFS()) {
740 Abort("Couldn't connect to file server\n");
742 AskOffline(singleVolumeNumber, partP->name);
745 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
746 partP->name, name, (Testing ? "(READONLY mode)" : ""));
748 Log("***Forced salvage of all volumes on this partition***\n");
753 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
760 assert((dirp = opendir(fileSysPath)) != NULL);
761 while ((dp = readdir(dirp))) {
762 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
763 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
765 Log("Removing old salvager temp files %s\n", dp->d_name);
766 strcpy(npath, fileSysPath);
768 strcat(npath, dp->d_name);
774 tdir = (tmpdir ? tmpdir : fileSysPath);
776 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
777 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
779 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
782 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
783 unlink(inodeListPath);
787 /* Using nt_unlink here since we're really using the delete on close
788 * semantics of unlink. In most places in the salvager, we really do
789 * mean to unlink the file at that point. Those places have been
790 * modified to actually do that so that the NT crt can be used there.
793 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
794 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
796 inodeFd = afs_open(inodeListPath, O_RDONLY);
797 unlink(inodeListPath);
800 Abort("Temporary file %s is missing...\n", inodeListPath);
801 if (ListInodeOption) {
805 /* enumerate volumes in the partition.
806 * figure out sets of read-only + rw volumes.
807 * salvage each set, read-only volumes first, then read-write.
808 * Fix up inodes on last volume in set (whether it is read-write
811 GetVolumeSummary(singleVolumeNumber);
813 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
814 i < nVolumesInInodeFile; i = j) {
815 VolumeId rwvid = inodeSummary[i].RWvolumeId;
817 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
819 VolumeId vid = inodeSummary[j].volumeId;
820 struct VolumeSummary *tsp;
821 /* Scan volume list (from partition root directory) looking for the
822 * current rw volume number in the volume list from the inode scan.
823 * If there is one here that is not in the inode volume list,
825 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
827 DeleteExtraVolumeHeaderFile(vsp);
829 /* Now match up the volume summary info from the root directory with the
830 * entry in the volume list obtained from scanning inodes */
831 inodeSummary[j].volSummary = NULL;
832 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
833 if (tsp->header.id == vid) {
834 inodeSummary[j].volSummary = tsp;
840 /* Salvage the group of volumes (several read-only + 1 read/write)
841 * starting with the current read-only volume we're looking at.
843 SalvageVolumeGroup(&inodeSummary[i], j - i);
846 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
847 for (; vsp < esp; vsp++) {
849 DeleteExtraVolumeHeaderFile(vsp);
852 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
853 RemoveTheForce(fileSysPath);
855 if (!Testing && singleVolumeNumber) {
856 AskOnline(singleVolumeNumber, fileSysPartition->name);
858 /* Step through the volumeSummary list and set all volumes on-line.
859 * The volumes were taken off-line in GetVolumeSummary.
861 for (j = 0; j < nVolumes; j++) {
862 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
866 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
867 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
870 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
874 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
877 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", vsp->fileName, (Testing ? "would have been " : ""));
879 unlink(vsp->fileName);
884 CompareInodes(const void *_p1, const void *_p2)
886 register const struct ViceInodeInfo *p1 = _p1;
887 register const struct ViceInodeInfo *p2 = _p2;
888 if (p1->u.vnode.vnodeNumber == INODESPECIAL
889 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
890 VolumeId p1rwid, p2rwid;
892 (p1->u.vnode.vnodeNumber ==
893 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
895 (p2->u.vnode.vnodeNumber ==
896 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
901 if (p1->u.vnode.vnodeNumber == INODESPECIAL
902 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
903 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
904 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
905 if (p1->u.vnode.volumeId == p1rwid)
907 if (p2->u.vnode.volumeId == p2rwid)
909 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
911 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
912 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
913 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
915 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
917 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
919 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
921 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
923 /* The following tests are reversed, so that the most desirable
924 * of several similar inodes comes first */
925 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
927 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
928 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
932 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
933 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
938 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
940 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
941 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
945 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
946 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
951 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
953 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
954 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
958 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
959 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
964 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
966 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
967 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
971 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
972 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
981 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
982 register struct InodeSummary *summary)
984 VolumeId volume = ip->u.vnode.volumeId;
985 VolumeId rwvolume = volume;
986 register int n, nSpecial;
987 register Unique maxunique;
990 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
992 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
994 rwvolume = ip->u.special.parentId;
995 /* This isn't quite right, as there could (in error) be different
996 * parent inodes in different special vnodes */
998 if (maxunique < ip->u.vnode.vnodeUniquifier)
999 maxunique = ip->u.vnode.vnodeUniquifier;
1003 summary->volumeId = volume;
1004 summary->RWvolumeId = rwvolume;
1005 summary->nInodes = n;
1006 summary->nSpecialInodes = nSpecial;
1007 summary->maxUniquifier = maxunique;
1011 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1013 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1014 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1015 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1020 * Collect list of inodes in file named by path. If a truly fatal error,
1021 * unlink the file and abort. For lessor errors, return -1. The file will
1022 * be unlinked by the caller.
1025 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1027 struct afs_stat status;
1029 struct ViceInodeInfo *ip;
1030 struct InodeSummary summary;
1031 char summaryFileName[50];
1034 char *dev = fileSysPath;
1035 char *wpath = fileSysPath;
1037 char *dev = fileSysDeviceName;
1038 char *wpath = filesysfulldev;
1040 char *part = fileSysPath;
1043 /* This file used to come from vfsck; cobble it up ourselves now... */
1045 ListViceInodes(dev, fileSysPath, path,
1046 singleVolumeNumber ? OnlyOneVolume : 0,
1047 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1049 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1053 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1055 if (forceSal && !ForceSalvage) {
1056 Log("***Forced salvage of all volumes on this partition***\n");
1059 inodeFd = afs_open(path, O_RDWR);
1060 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1062 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1064 tdir = (tmpdir ? tmpdir : part);
1066 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1067 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1069 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1070 "%s/salvage.temp.%d", tdir, getpid());
1072 summaryFile = afs_fopen(summaryFileName, "a+");
1073 if (summaryFile == NULL) {
1076 Abort("Unable to create inode summary file\n");
1078 if (!canfork || debug || Fork() == 0) {
1080 unsigned long st_size=(unsigned long) status.st_size;
1081 nInodes = st_size / sizeof(struct ViceInodeInfo);
1083 fclose(summaryFile);
1085 unlink(summaryFileName);
1086 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1087 RemoveTheForce(fileSysPath);
1089 struct VolumeSummary *vsp;
1092 GetVolumeSummary(singleVolumeNumber);
1094 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1096 DeleteExtraVolumeHeaderFile(vsp);
1099 Log("%s vice inodes on %s; not salvaged\n",
1100 singleVolumeNumber ? "No applicable" : "No", dev);
1103 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1105 fclose(summaryFile);
1108 unlink(summaryFileName);
1110 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1113 if (read(inodeFd, ip, st_size) != st_size) {
1114 fclose(summaryFile);
1117 unlink(summaryFileName);
1118 Abort("Unable to read inode table; %s not salvaged\n", dev);
1120 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1121 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1122 || write(inodeFd, ip, st_size) != st_size) {
1123 fclose(summaryFile);
1126 unlink(summaryFileName);
1127 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1131 CountVolumeInodes(ip, nInodes, &summary);
1132 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1133 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1134 fclose(summaryFile);
1138 summary.index += (summary.nInodes);
1139 nInodes -= summary.nInodes;
1140 ip += summary.nInodes;
1142 /* Following fflush is not fclose, because if it was debug mode would not work */
1143 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1144 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1145 fclose(summaryFile);
1149 if (canfork && !debug) {
1154 if (Wait("Inode summary") == -1) {
1155 fclose(summaryFile);
1158 unlink(summaryFileName);
1159 Exit(1); /* salvage of this partition aborted */
1162 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1163 if (status.st_size != 0) {
1165 unsigned long st_status=(unsigned long)status.st_size;
1166 inodeSummary = (struct InodeSummary *)malloc(st_status);
1167 assert(inodeSummary != NULL);
1168 /* For GNU we need to do lseek to get the file pointer moved. */
1169 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1170 ret = read(fileno(summaryFile), inodeSummary, st_status);
1171 assert(ret == st_status);
1173 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1174 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1175 fclose(summaryFile);
1177 unlink(summaryFileName);
1181 /* Comparison routine for volume sort.
1182 This is setup so that a read-write volume comes immediately before
1183 any read-only clones of that volume */
1185 CompareVolumes(const void *_p1, const void *_p2)
1187 register const struct VolumeSummary *p1 = _p1;
1188 register const struct VolumeSummary *p2 = _p2;
1189 if (p1->header.parent != p2->header.parent)
1190 return p1->header.parent < p2->header.parent ? -1 : 1;
1191 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1193 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1195 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1199 GetVolumeSummary(VolumeId singleVolumeNumber)
1202 afs_int32 nvols = 0;
1203 struct VolumeSummary *vsp, vs;
1204 struct VolumeDiskHeader diskHeader;
1207 /* Get headers from volume directory */
1208 dirp = opendir(fileSysPath);
1210 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1211 if (!singleVolumeNumber) {
1212 while ((dp = readdir(dirp))) {
1213 char *p = dp->d_name;
1214 p = strrchr(dp->d_name, '.');
1215 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1218 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1219 if ((fd = afs_open(name, O_RDONLY)) != -1
1220 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1221 == sizeof(diskHeader)
1222 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1223 DiskToVolumeHeader(&vs.header, &diskHeader);
1231 dirp = opendir("."); /* No rewinddir for NT */
1238 (struct VolumeSummary *)malloc(nvols *
1239 sizeof(struct VolumeSummary));
1242 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1243 assert(volumeSummaryp != NULL);
1246 vsp = volumeSummaryp;
1247 while ((dp = readdir(dirp))) {
1248 char *p = dp->d_name;
1249 p = strrchr(dp->d_name, '.');
1250 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1254 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1255 if ((fd = afs_open(name, O_RDONLY)) == -1
1256 || read(fd, &diskHeader, sizeof(diskHeader))
1257 != sizeof(diskHeader)
1258 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1263 if (!singleVolumeNumber) {
1265 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1270 char nameShouldBe[64];
1271 DiskToVolumeHeader(&vsp->header, &diskHeader);
1272 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1273 && vsp->header.parent != singleVolumeNumber) {
1274 if (programType == salvageServer) {
1275 #ifdef SALVSYNC_BUILD_CLIENT
1276 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1277 vsp->header.id, vsp->header.parent);
1278 if (SALVSYNC_LinkVolume(vsp->header.parent,
1280 fileSysPartition->name,
1282 Log("schedule request failed\n");
1285 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1287 Log("%u is a read-only volume; not salvaged\n",
1288 singleVolumeNumber);
1292 if (!singleVolumeNumber
1293 || (vsp->header.id == singleVolumeNumber
1294 || vsp->header.parent == singleVolumeNumber)) {
1295 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1296 VFORMAT, afs_cast_uint32(vsp->header.id));
1297 if (singleVolumeNumber
1298 && vsp->header.id != singleVolumeNumber)
1299 AskOffline(vsp->header.id, fileSysPartition->name);
1300 if (strcmp(nameShouldBe, dp->d_name)) {
1302 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1306 vsp->fileName = ToString(dp->d_name);
1316 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1320 /* Find the link table. This should be associated with the RW volume or, if
1321 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1324 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1325 struct ViceInodeInfo *allInodes)
1328 struct ViceInodeInfo *ip;
1330 for (i = 0; i < nVols; i++) {
1331 ip = allInodes + isp[i].index;
1332 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1333 if (ip[j].u.special.type == VI_LINKTABLE)
1334 return ip[j].inodeNumber;
1341 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1343 struct versionStamp version;
1346 if (!VALID_INO(ino))
1348 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1349 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1350 if (!VALID_INO(ino))
1352 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1353 isp->RWvolumeId, errno);
1354 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1355 fdP = IH_OPEN(VGLinkH);
1357 Abort("Can't open link table for volume %u (error = %d)\n",
1358 isp->RWvolumeId, errno);
1360 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1361 Abort("Can't truncate link table for volume %u (error = %d)\n",
1362 isp->RWvolumeId, errno);
1364 version.magic = LINKTABLEMAGIC;
1365 version.version = LINKTABLEVERSION;
1367 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1369 Abort("Can't truncate link table for volume %u (error = %d)\n",
1370 isp->RWvolumeId, errno);
1372 FDH_REALLYCLOSE(fdP);
1374 /* If the volume summary exits (i.e., the V*.vol header file exists),
1375 * then set this inode there as well.
1377 if (isp->volSummary)
1378 isp->volSummary->header.linkTable = ino;
1387 SVGParms_t *parms = (SVGParms_t *) arg;
1388 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1393 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1396 pthread_attr_t tattr;
1400 /* Initialize per volume global variables, even if later code does so */
1404 memset(&VolInfo, 0, sizeof(VolInfo));
1406 parms.svgp_inodeSummaryp = isp;
1407 parms.svgp_count = nVols;
1408 code = pthread_attr_init(&tattr);
1410 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1414 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1416 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1419 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1421 Log("Failed to create thread to salvage volume group %u\n",
1425 (void)pthread_join(tid, NULL);
1427 #endif /* AFS_NT40_ENV */
1430 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1432 struct ViceInodeInfo *inodes, *allInodes, *ip;
1433 int i, totalInodes, size, salvageTo;
1437 int dec_VGLinkH = 0;
1439 FdHandle_t *fdP = NULL;
1442 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1443 && isp->nSpecialInodes > 0);
1444 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1445 if (!ForceSalvage && QuickCheck(isp, nVols))
1448 if (ShowMounts && !haveRWvolume)
1450 if (canfork && !debug && Fork() != 0) {
1451 (void)Wait("Salvage volume group");
1454 for (i = 0, totalInodes = 0; i < nVols; i++)
1455 totalInodes += isp[i].nInodes;
1456 size = totalInodes * sizeof(struct ViceInodeInfo);
1457 inodes = (struct ViceInodeInfo *)malloc(size);
1458 allInodes = inodes - isp->index; /* this would the base of all the inodes
1459 * for the partition, if all the inodes
1460 * had been read into memory */
1462 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1464 assert(read(inodeFd, inodes, size) == size);
1466 /* Don't try to salvage a read write volume if there isn't one on this
1468 salvageTo = haveRWvolume ? 0 : 1;
1470 #ifdef AFS_NAMEI_ENV
1471 ino = FindLinkHandle(isp, nVols, allInodes);
1472 if (VALID_INO(ino)) {
1473 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1474 fdP = IH_OPEN(VGLinkH);
1476 if (!VALID_INO(ino) || fdP == NULL) {
1477 Log("%s link table for volume %u.\n",
1478 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1480 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1483 struct ViceInodeInfo *ip;
1484 CreateLinkTable(isp, ino);
1485 fdP = IH_OPEN(VGLinkH);
1486 /* Sync fake 1 link counts to the link table, now that it exists */
1488 for (i = 0; i < nVols; i++) {
1489 ip = allInodes + isp[i].index;
1490 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1492 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1494 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1502 FDH_REALLYCLOSE(fdP);
1504 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1507 /* Salvage in reverse order--read/write volume last; this way any
1508 * Inodes not referenced by the time we salvage the read/write volume
1509 * can be picked up by the read/write volume */
1510 /* ACTUALLY, that's not done right now--the inodes just vanish */
1511 for (i = nVols - 1; i >= salvageTo; i--) {
1513 struct InodeSummary *lisp = &isp[i];
1514 #ifdef AFS_NAMEI_ENV
1515 /* If only the RO is present on this partition, the link table
1516 * shows up as a RW volume special file. Need to make sure the
1517 * salvager doesn't try to salvage the non-existent RW.
1519 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1520 /* If this only special inode is the link table, continue */
1521 if (inodes->u.special.type == VI_LINKTABLE) {
1528 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1529 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1530 /* Check inodes twice. The second time do things seriously. This
1531 * way the whole RO volume can be deleted, below, if anything goes wrong */
1532 for (check = 1; check >= 0; check--) {
1534 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1536 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1537 if (rw && deleteMe) {
1538 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1539 * volume won't be called */
1545 if (rw && check == 1)
1547 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1548 MaybeZapVolume(lisp, "Vnode index", 0, check);
1554 /* Fix actual inode counts */
1556 Log("totalInodes %d\n",totalInodes);
1557 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1558 static int TraceBadLinkCounts = 0;
1559 #ifdef AFS_NAMEI_ENV
1560 if (VGLinkH->ih_ino == ip->inodeNumber) {
1561 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1562 VGLinkH_p1 = ip->u.param[0];
1563 continue; /* Deal with this last. */
1566 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1567 TraceBadLinkCounts--; /* Limit reports, per volume */
1568 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1570 while (ip->linkCount > 0) {
1571 /* below used to assert, not break */
1573 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1574 Log("idec failed. inode %s errno %d\n",
1575 PrintInode(NULL, ip->inodeNumber), errno);
1581 while (ip->linkCount < 0) {
1582 /* these used to be asserts */
1584 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1585 Log("iinc failed. inode %s errno %d\n",
1586 PrintInode(NULL, ip->inodeNumber), errno);
1593 #ifdef AFS_NAMEI_ENV
1594 while (dec_VGLinkH > 0) {
1595 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1596 Log("idec failed on link table, errno = %d\n", errno);
1600 while (dec_VGLinkH < 0) {
1601 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1602 Log("iinc failed on link table, errno = %d\n", errno);
1609 /* Directory consistency checks on the rw volume */
1611 SalvageVolume(isp, VGLinkH);
1612 IH_RELEASE(VGLinkH);
1614 if (canfork && !debug) {
1621 QuickCheck(register struct InodeSummary *isp, int nVols)
1623 /* Check headers BEFORE forking */
1627 for (i = 0; i < nVols; i++) {
1628 struct VolumeSummary *vs = isp[i].volSummary;
1629 VolumeDiskData volHeader;
1631 /* Don't salvage just because phantom rw volume is there... */
1632 /* (If a read-only volume exists, read/write inodes must also exist) */
1633 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1637 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1638 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1639 == sizeof(volHeader)
1640 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1641 && volHeader.dontSalvage == DONT_SALVAGE
1642 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1643 if (volHeader.inUse != 0) {
1644 volHeader.inUse = 0;
1645 volHeader.inService = 1;
1647 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1648 != sizeof(volHeader)) {
1664 /* SalvageVolumeHeaderFile
1666 * Salvage the top level V*.vol header file. Make sure the special files
1667 * exist and that there are no duplicates.
1669 * Calls SalvageHeader for each possible type of volume special file.
1673 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1674 register struct ViceInodeInfo *inodes, int RW,
1675 int check, int *deleteMe)
1679 register struct ViceInodeInfo *ip;
1680 int allinodesobsolete = 1;
1681 struct VolumeDiskHeader diskHeader;
1685 memset(&tempHeader, 0, sizeof(tempHeader));
1686 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1687 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1688 tempHeader.id = isp->volumeId;
1689 tempHeader.parent = isp->RWvolumeId;
1690 /* Check for duplicates (inodes are sorted by type field) */
1691 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1692 ip = &inodes[isp->index + i];
1693 if (ip->u.special.type == (ip + 1)->u.special.type) {
1695 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1699 for (i = 0; i < isp->nSpecialInodes; i++) {
1700 ip = &inodes[isp->index + i];
1701 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1703 Log("Rubbish header inode\n");
1706 Log("Rubbish header inode; deleted\n");
1707 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1708 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1709 if (!check && ip->u.special.type != VI_LINKTABLE)
1710 ip->linkCount--; /* Keep the inode around */
1711 allinodesobsolete = 0;
1715 if (allinodesobsolete) {
1722 VGLinkH_cnt++; /* one for every header. */
1724 if (!RW && !check && isp->volSummary) {
1725 ClearROInUseBit(isp->volSummary);
1729 for (i = 0; i < MAXINODETYPE; i++) {
1730 if (stuff[i].inodeType == VI_LINKTABLE) {
1731 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1732 * And we may have recreated the link table earlier, so set the
1733 * RW header as well.
1735 if (VALID_INO(VGLinkH->ih_ino)) {
1736 *stuff[i].inode = VGLinkH->ih_ino;
1740 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1744 if (isp->volSummary == NULL) {
1746 char headerName[64];
1747 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_cast_uint32(isp->volumeId));
1748 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1750 Log("No header file for volume %u\n", isp->volumeId);
1754 Log("No header file for volume %u; %screating %s\n",
1755 isp->volumeId, (Testing ? "it would have been " : ""),
1757 headerFd = afs_open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
1758 assert(headerFd != -1);
1759 isp->volSummary = (struct VolumeSummary *)
1760 malloc(sizeof(struct VolumeSummary));
1761 isp->volSummary->fileName = ToString(headerName);
1764 char headerName[64];
1765 /* hack: these two fields are obsolete... */
1766 isp->volSummary->header.volumeAcl = 0;
1767 isp->volSummary->header.volumeMountTable = 0;
1770 (&isp->volSummary->header, &tempHeader,
1771 sizeof(struct VolumeHeader))) {
1772 /* We often remove the name before calling us, so we make a fake one up */
1773 if (isp->volSummary->fileName) {
1774 strcpy(headerName, isp->volSummary->fileName);
1776 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_cast_uint32(isp->volumeId));
1777 isp->volSummary->fileName = ToString(headerName);
1779 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1781 Log("Header file %s is damaged or no longer valid%s\n", path,
1782 (check ? "" : "; repairing"));
1786 headerFd = afs_open(path, O_RDWR | O_TRUNC, 0644);
1787 assert(headerFd != -1);
1791 memcpy(&isp->volSummary->header, &tempHeader,
1792 sizeof(struct VolumeHeader));
1795 Log("It would have written a new header file for volume %u\n",
1798 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1799 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1800 != sizeof(struct VolumeDiskHeader)) {
1801 Log("Couldn't rewrite volume header file!\n");
1808 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1809 isp->volSummary->header.volumeInfo);
1814 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1818 VolumeDiskData volumeInfo;
1819 struct versionStamp fileHeader;
1828 #ifndef AFS_NAMEI_ENV
1829 if (sp->inodeType == VI_LINKTABLE)
1832 if (*(sp->inode) == 0) {
1834 Log("Missing inode in volume header (%s)\n", sp->description);
1838 Log("Missing inode in volume header (%s); %s\n", sp->description,
1839 (Testing ? "it would have recreated it" : "recreating"));
1842 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1843 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1844 if (!VALID_INO(*(sp->inode)))
1846 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1847 sp->description, errno);
1852 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1853 fdP = IH_OPEN(specH);
1854 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1855 /* bail out early and destroy the volume */
1857 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1864 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1865 sp->description, errno);
1868 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1869 || header.fileHeader.magic != sp->stamp.magic)) {
1871 Log("Part of the header (%s) is corrupted\n", sp->description);
1872 FDH_REALLYCLOSE(fdP);
1876 Log("Part of the header (%s) is corrupted; recreating\n",
1880 if (sp->inodeType == VI_VOLINFO
1881 && header.volumeInfo.destroyMe == DESTROY_ME) {
1884 FDH_REALLYCLOSE(fdP);
1888 if (recreate && !Testing) {
1891 ("Internal error: recreating volume header (%s) in check mode\n",
1893 code = FDH_TRUNC(fdP, 0);
1895 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1896 sp->description, errno);
1898 /* The following code should be moved into vutil.c */
1899 if (sp->inodeType == VI_VOLINFO) {
1901 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1902 header.volumeInfo.stamp = sp->stamp;
1903 header.volumeInfo.id = isp->volumeId;
1904 header.volumeInfo.parentId = isp->RWvolumeId;
1905 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1906 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1907 isp->volumeId, isp->volumeId);
1908 header.volumeInfo.inService = 0;
1909 header.volumeInfo.blessed = 0;
1910 /* The + 1000 is a hack in case there are any files out in venus caches */
1911 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1912 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1913 header.volumeInfo.needsCallback = 0;
1914 gettimeofday(&tp, 0);
1915 header.volumeInfo.creationDate = tp.tv_sec;
1916 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1918 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1919 sp->description, errno);
1922 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1923 sizeof(header.volumeInfo));
1924 if (code != sizeof(header.volumeInfo)) {
1927 ("Unable to write volume header file (%s) (errno = %d)\n",
1928 sp->description, errno);
1929 Abort("Unable to write entire volume header file (%s)\n",
1933 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1935 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1936 sp->description, errno);
1938 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1939 if (code != sizeof(sp->stamp)) {
1942 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1943 sp->description, errno);
1945 ("Unable to write entire version stamp in volume header file (%s)\n",
1950 FDH_REALLYCLOSE(fdP);
1952 if (sp->inodeType == VI_VOLINFO) {
1953 VolInfo = header.volumeInfo;
1956 if (VolInfo.updateDate) {
1957 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1959 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1960 (Testing ? "it would have been " : ""), update);
1962 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1964 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1965 VolInfo.id, update);
1975 SalvageVnodes(register struct InodeSummary *rwIsp,
1976 register struct InodeSummary *thisIsp,
1977 register struct ViceInodeInfo *inodes, int check)
1979 int ilarge, ismall, ioffset, RW, nInodes;
1980 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1983 RW = (rwIsp == thisIsp);
1984 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1986 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1987 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1988 if (check && ismall == -1)
1991 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1992 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1993 return (ilarge == 0 && ismall == 0 ? 0 : -1);
1997 SalvageIndex(Inode ino, VnodeClass class, int RW,
1998 register struct ViceInodeInfo *ip, int nInodes,
1999 struct VolumeSummary *volSummary, int check)
2001 VolumeId volumeNumber;
2002 char buf[SIZEOF_LARGEDISKVNODE];
2003 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2005 StreamHandle_t *file;
2006 struct VnodeClassInfo *vcp;
2008 afs_fsize_t vnodeLength;
2009 int vnodeIndex, nVnodes;
2010 afs_ino_str_t stmp1, stmp2;
2014 volumeNumber = volSummary->header.id;
2015 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2016 fdP = IH_OPEN(handle);
2017 assert(fdP != NULL);
2018 file = FDH_FDOPEN(fdP, "r+");
2019 assert(file != NULL);
2020 vcp = &VnodeClassInfo[class];
2021 size = OS_SIZE(fdP->fd_fd);
2023 nVnodes = (size / vcp->diskSize) - 1;
2025 assert((nVnodes + 1) * vcp->diskSize == size);
2026 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2030 for (vnodeIndex = 0;
2031 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2032 nVnodes--, vnodeIndex++) {
2033 if (vnode->type != vNull) {
2034 int vnodeChanged = 0;
2035 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2036 /* Log programs that belong to root (potentially suid root);
2037 * don't bother for read-only or backup volumes */
2038 #ifdef notdef /* This is done elsewhere */
2039 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2040 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2042 if (VNDISK_GET_INO(vnode) == 0) {
2044 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2045 memset(vnode, 0, vcp->diskSize);
2049 if (vcp->magic != vnode->vnodeMagic) {
2050 /* bad magic #, probably partially created vnode */
2051 Log("Partially allocated vnode %d deleted.\n",
2053 memset(vnode, 0, vcp->diskSize);
2057 /* ****** Should do a bit more salvage here: e.g. make sure
2058 * vnode type matches what it should be given the index */
2059 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2060 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2061 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2062 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2069 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2070 /* The following doesn't work, because the version number
2071 * is not maintained correctly by the file server */
2072 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2073 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2075 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2081 /* For RW volume, look for vnode with matching inode number;
2082 * if no such match, take the first determined by our sort
2084 register struct ViceInodeInfo *lip = ip;
2085 register int lnInodes = nInodes;
2087 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2088 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2097 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2098 /* "Matching" inode */
2102 vu = vnode->uniquifier;
2103 iu = ip->u.vnode.vnodeUniquifier;
2104 vd = vnode->dataVersion;
2105 id = ip->u.vnode.inodeDataVersion;
2107 * Because of the possibility of the uniquifier overflows (> 4M)
2108 * we compare them modulo the low 22-bits; we shouldn't worry
2109 * about mismatching since they shouldn't to many old
2110 * uniquifiers of the same vnode...
2112 if (IUnique(vu) != IUnique(iu)) {
2114 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2117 vnode->uniquifier = iu;
2118 #ifdef AFS_3DISPARES
2119 vnode->dataVersion = (id >= vd ?
2122 1887437 ? vd : id) :
2125 1887437 ? id : vd));
2127 #if defined(AFS_SGI_EXMAG)
2128 vnode->dataVersion = (id >= vd ?
2131 15099494 ? vd : id) :
2134 15099494 ? id : vd));
2136 vnode->dataVersion = (id > vd ? id : vd);
2137 #endif /* AFS_SGI_EXMAG */
2138 #endif /* AFS_3DISPARES */
2141 /* don't bother checking for vd > id any more, since
2142 * partial file transfers always result in this state,
2143 * and you can't do much else anyway (you've already
2144 * found the best data you can) */
2145 #ifdef AFS_3DISPARES
2146 if (!vnodeIsDirectory(vnodeNumber)
2147 && ((vd < id && (id - vd) < 1887437)
2148 || ((vd > id && (vd - id) > 1887437)))) {
2150 #if defined(AFS_SGI_EXMAG)
2151 if (!vnodeIsDirectory(vnodeNumber)
2152 && ((vd < id && (id - vd) < 15099494)
2153 || ((vd > id && (vd - id) > 15099494)))) {
2155 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2156 #endif /* AFS_SGI_EXMAG */
2159 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2160 vnode->dataVersion = id;
2165 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2168 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2170 VNDISK_SET_INO(vnode, ip->inodeNumber);
2175 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2177 VNDISK_SET_INO(vnode, ip->inodeNumber);
2180 VNDISK_GET_LEN(vnodeLength, vnode);
2181 if (ip->byteCount != vnodeLength) {
2184 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2189 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2190 VNDISK_SET_LEN(vnode, ip->byteCount);
2194 ip->linkCount--; /* Keep the inode around */
2197 } else { /* no matching inode */
2198 if (VNDISK_GET_INO(vnode) != 0
2199 || vnode->type == vDirectory) {
2200 /* No matching inode--get rid of the vnode */
2202 if (VNDISK_GET_INO(vnode)) {
2204 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2208 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2213 if (VNDISK_GET_INO(vnode)) {
2215 time_t serverModifyTime = vnode->serverModifyTime;
2216 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2220 time_t serverModifyTime = vnode->serverModifyTime;
2221 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2224 memset(vnode, 0, vcp->diskSize);
2227 /* Should not reach here becuase we checked for
2228 * (inodeNumber == 0) above. And where we zero the vnode,
2229 * we also goto vnodeDone.
2233 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2237 } /* VNDISK_GET_INO(vnode) != 0 */
2239 assert(!(vnodeChanged && check));
2240 if (vnodeChanged && !Testing) {
2242 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2243 (char *)vnode, vcp->diskSize)
2245 VolumeChanged = 1; /* For break call back */
2256 struct VnodeEssence *
2257 CheckVnodeNumber(VnodeId vnodeNumber)
2260 struct VnodeInfo *vip;
2263 class = vnodeIdToClass(vnodeNumber);
2264 vip = &vnodeInfo[class];
2265 offset = vnodeIdToBitNumber(vnodeNumber);
2266 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2270 CopyOnWrite(register struct DirSummary *dir)
2272 /* Copy the directory unconditionally if we are going to change it:
2273 * not just if was cloned.
2275 struct VnodeDiskObject vnode;
2276 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2277 Inode oldinode, newinode;
2280 if (dir->copied || Testing)
2282 DFlush(); /* Well justified paranoia... */
2285 IH_IREAD(vnodeInfo[vLarge].handle,
2286 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2288 assert(code == sizeof(vnode));
2289 oldinode = VNDISK_GET_INO(&vnode);
2290 /* Increment the version number by a whole lot to avoid problems with
2291 * clients that were promised new version numbers--but the file server
2292 * crashed before the versions were written to disk.
2295 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2296 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2298 assert(VALID_INO(newinode));
2299 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2301 VNDISK_SET_INO(&vnode, newinode);
2303 IH_IWRITE(vnodeInfo[vLarge].handle,
2304 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2306 assert(code == sizeof(vnode));
2308 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2309 fileSysDevice, newinode);
2310 /* Don't delete the original inode right away, because the directory is
2311 * still being scanned.
2317 * This function should either successfully create a new dir, or give up
2318 * and leave things the way they were. In particular, if it fails to write
2319 * the new dir properly, it should return w/o changing the reference to the
2323 CopyAndSalvage(register struct DirSummary *dir)
2325 struct VnodeDiskObject vnode;
2326 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2327 Inode oldinode, newinode;
2332 afs_int32 parentUnique = 1;
2333 struct VnodeEssence *vnodeEssence;
2338 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2340 IH_IREAD(vnodeInfo[vLarge].handle,
2341 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2343 assert(lcode == sizeof(vnode));
2344 oldinode = VNDISK_GET_INO(&vnode);
2345 /* Increment the version number by a whole lot to avoid problems with
2346 * clients that were promised new version numbers--but the file server
2347 * crashed before the versions were written to disk.
2350 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2351 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2353 assert(VALID_INO(newinode));
2354 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2356 /* Assign . and .. vnode numbers from dir and vnode.parent.
2357 * The uniquifier for . is in the vnode.
2358 * The uniquifier for .. might be set to a bogus value of 1 and
2359 * the salvager will later clean it up.
2361 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2362 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2365 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2367 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2372 /* didn't really build the new directory properly, let's just give up. */
2373 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2374 Log("Directory salvage returned code %d, continuing.\n", code);
2376 Log("also failed to decrement link count on new inode");
2380 Log("Checking the results of the directory salvage...\n");
2381 if (!DirOK(&newdir)) {
2382 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2383 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2388 VNDISK_SET_INO(&vnode, newinode);
2389 length = Length(&newdir);
2390 VNDISK_SET_LEN(&vnode, length);
2392 IH_IWRITE(vnodeInfo[vLarge].handle,
2393 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2395 assert(lcode == sizeof(vnode));
2398 nt_sync(fileSysDevice);
2400 sync(); /* this is slow, but hopefully rarely called. We don't have
2401 * an open FD on the file itself to fsync.
2405 vnodeInfo[vLarge].handle->ih_synced = 1;
2407 /* make sure old directory file is really closed */
2408 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2409 FDH_REALLYCLOSE(fdP);
2411 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2413 dir->dirHandle = newdir;
2417 JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
2420 struct VnodeEssence *vnodeEssence;
2421 afs_int32 dirOrphaned, todelete;
2423 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2425 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2426 if (vnodeEssence == NULL) {
2428 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2432 assert(Delete(&dir->dirHandle, name) == 0);
2437 #ifndef AFS_NAMEI_ENV
2438 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2439 * mount inode for the partition. If this inode were deleted, it would crash
2442 if (vnodeEssence->InodeNumber == 0) {
2443 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2446 assert(Delete(&dir->dirHandle, name) == 0);
2453 if (!(vnodeNumber & 1) && !Showmode
2454 && !(vnodeEssence->count || vnodeEssence->unique
2455 || vnodeEssence->modeBits)) {
2456 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2457 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2458 vnodeNumber, unique,
2459 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2464 assert(Delete(&dir->dirHandle, name) == 0);
2470 /* Check if the Uniquifiers match. If not, change the directory entry
2471 * so its unique matches the vnode unique. Delete if the unique is zero
2472 * or if the directory is orphaned.
2474 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2475 if (!vnodeEssence->unique
2476 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2477 /* This is an orphaned directory. Don't delete the . or ..
2478 * entry. Otherwise, it will get created in the next
2479 * salvage and deleted again here. So Just skip it.
2484 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2487 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2491 fid.Vnode = vnodeNumber;
2492 fid.Unique = vnodeEssence->unique;
2494 assert(Delete(&dir->dirHandle, name) == 0);
2496 assert(Create(&dir->dirHandle, name, &fid) == 0);
2499 return 0; /* no need to continue */
2502 if (strcmp(name, ".") == 0) {
2503 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2506 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2509 assert(Delete(&dir->dirHandle, ".") == 0);
2510 fid.Vnode = dir->vnodeNumber;
2511 fid.Unique = dir->unique;
2512 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2515 vnodeNumber = fid.Vnode; /* Get the new Essence */
2516 unique = fid.Unique;
2517 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2520 } else if (strcmp(name, "..") == 0) {
2523 struct VnodeEssence *dotdot;
2524 pa.Vnode = dir->parent;
2525 dotdot = CheckVnodeNumber(pa.Vnode);
2526 assert(dotdot != NULL); /* XXX Should not be assert */
2527 pa.Unique = dotdot->unique;
2529 pa.Vnode = dir->vnodeNumber;
2530 pa.Unique = dir->unique;
2532 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2534 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2537 assert(Delete(&dir->dirHandle, "..") == 0);
2538 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2541 vnodeNumber = pa.Vnode; /* Get the new Essence */
2543 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2545 dir->haveDotDot = 1;
2546 } else if (strncmp(name, ".__afs", 6) == 0) {
2548 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2552 assert(Delete(&dir->dirHandle, name) == 0);
2554 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2555 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2558 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2559 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2560 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2561 && !(vnodeEssence->modeBits & 0111)) {
2567 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2568 vnodeEssence->InodeNumber);
2571 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2575 size = FDH_SIZE(fdP);
2577 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2578 FDH_REALLYCLOSE(fdP);
2585 code = FDH_READ(fdP, buf, size);
2588 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2589 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2590 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2591 Testing ? "would convert" : "converted");
2592 vnodeEssence->modeBits |= 0111;
2593 vnodeEssence->changed = 1;
2594 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2595 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2596 dir->name ? dir->name : "??", name, buf);
2598 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2599 dir->vname, vnodeNumber, size, code);
2601 FDH_REALLYCLOSE(fdP);
2604 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2605 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2606 if (vnodeIdToClass(vnodeNumber) == vLarge
2607 && vnodeEssence->name == NULL) {
2609 if ((n = (char *)malloc(strlen(name) + 1)))
2611 vnodeEssence->name = n;
2614 /* The directory entry points to the vnode. Check to see if the
2615 * vnode points back to the directory. If not, then let the
2616 * directory claim it (else it might end up orphaned). Vnodes
2617 * already claimed by another directory are deleted from this
2618 * directory: hardlinks to the same vnode are not allowed
2619 * from different directories.
2621 if (vnodeEssence->parent != dir->vnodeNumber) {
2622 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2623 /* Vnode does not point back to this directory.
2624 * Orphaned dirs cannot claim a file (it may belong to
2625 * another non-orphaned dir).
2628 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2630 vnodeEssence->parent = dir->vnodeNumber;
2631 vnodeEssence->changed = 1;
2633 /* Vnode was claimed by another directory */
2636 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2637 } else if (vnodeNumber == 1) {
2638 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2640 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2645 assert(Delete(&dir->dirHandle, name) == 0);
2650 /* This directory claims the vnode */
2651 vnodeEssence->claimed = 1;
2653 vnodeEssence->count--;
2658 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2660 register struct VnodeInfo *vip = &vnodeInfo[class];
2661 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2662 char buf[SIZEOF_LARGEDISKVNODE];
2663 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2665 StreamHandle_t *file;
2670 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2671 fdP = IH_OPEN(vip->handle);
2672 assert(fdP != NULL);
2673 file = FDH_FDOPEN(fdP, "r+");
2674 assert(file != NULL);
2675 size = OS_SIZE(fdP->fd_fd);
2677 vip->nVnodes = (size / vcp->diskSize) - 1;
2678 if (vip->nVnodes > 0) {
2679 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2680 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2681 assert((vip->vnodes = (struct VnodeEssence *)
2682 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2683 if (class == vLarge) {
2684 assert((vip->inodes = (Inode *)
2685 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2694 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2695 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2696 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2697 nVnodes--, vnodeIndex++) {
2698 if (vnode->type != vNull) {
2699 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2700 afs_fsize_t vnodeLength;
2701 vip->nAllocatedVnodes++;
2702 vep->count = vnode->linkCount;
2703 VNDISK_GET_LEN(vnodeLength, vnode);
2704 vep->blockCount = nBlocks(vnodeLength);
2705 vip->volumeBlockCount += vep->blockCount;
2706 vep->parent = vnode->parent;
2707 vep->unique = vnode->uniquifier;
2708 if (*maxu < vnode->uniquifier)
2709 *maxu = vnode->uniquifier;
2710 vep->modeBits = vnode->modeBits;
2711 vep->InodeNumber = VNDISK_GET_INO(vnode);
2712 vep->type = vnode->type;
2713 vep->author = vnode->author;
2714 vep->owner = vnode->owner;
2715 vep->group = vnode->group;
2716 if (vnode->type == vDirectory) {
2717 assert(class == vLarge);
2718 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2727 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2729 struct VnodeEssence *parentvp;
2735 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2736 && GetDirName(vp->parent, parentvp, path)) {
2738 strcat(path, vp->name);
2744 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2745 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2748 IsVnodeOrphaned(VnodeId vnode)
2750 struct VnodeEssence *vep;
2753 return (1); /* Vnode zero does not exist */
2755 return (0); /* The root dir vnode is always claimed */
2756 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2757 if (!vep || !vep->claimed)
2758 return (1); /* Vnode is not claimed - it is orphaned */
2760 return (IsVnodeOrphaned(vep->parent));
2764 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2765 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2768 static struct DirSummary dir;
2769 static struct DirHandle dirHandle;
2770 struct VnodeEssence *parent;
2771 static char path[MAXPATHLEN];
2774 if (dirVnodeInfo->vnodes[i].salvaged)
2775 return; /* already salvaged */
2778 dirVnodeInfo->vnodes[i].salvaged = 1;
2780 if (dirVnodeInfo->inodes[i] == 0)
2781 return; /* Not allocated to a directory */
2783 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2784 if (dirVnodeInfo->vnodes[i].parent) {
2785 Log("Bad parent, vnode 1; %s...\n",
2786 (Testing ? "skipping" : "salvaging"));
2787 dirVnodeInfo->vnodes[i].parent = 0;
2788 dirVnodeInfo->vnodes[i].changed = 1;
2791 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2792 if (parent && parent->salvaged == 0)
2793 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2794 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2795 rootdir, rootdirfound);
2798 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2799 dir.unique = dirVnodeInfo->vnodes[i].unique;
2802 dir.parent = dirVnodeInfo->vnodes[i].parent;
2803 dir.haveDot = dir.haveDotDot = 0;
2804 dir.ds_linkH = alinkH;
2805 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2806 dirVnodeInfo->inodes[i]);
2808 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2811 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2812 (Testing ? "skipping" : "salvaging"));
2815 CopyAndSalvage(&dir);
2819 dirHandle = dir.dirHandle;
2822 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2823 &dirVnodeInfo->vnodes[i], path);
2826 /* If enumeration failed for random reasons, we will probably delete
2827 * too much stuff, so we guard against this instead.
2829 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2832 /* Delete the old directory if it was copied in order to salvage.
2833 * CopyOnWrite has written the new inode # to the disk, but we still
2834 * have the old one in our local structure here. Thus, we idec the
2838 if (dir.copied && !Testing) {
2839 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2841 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2844 /* Remember rootdir DirSummary _after_ it has been judged */
2845 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2846 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2854 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2856 /* This routine, for now, will only be called for read-write volumes */
2858 int BlocksInVolume = 0, FilesInVolume = 0;
2859 register VnodeClass class;
2860 struct DirSummary rootdir, oldrootdir;
2861 struct VnodeInfo *dirVnodeInfo;
2862 struct VnodeDiskObject vnode;
2863 VolumeDiskData volHeader;
2865 int orphaned, rootdirfound = 0;
2866 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2867 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2868 struct VnodeEssence *vep;
2871 afs_sfsize_t nBytes;
2873 VnodeId LFVnode, ThisVnode;
2874 Unique LFUnique, ThisUnique;
2877 vid = rwIsp->volSummary->header.id;
2878 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2879 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2880 assert(nBytes == sizeof(volHeader));
2881 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2882 assert(volHeader.destroyMe != DESTROY_ME);
2883 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2885 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2887 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2890 dirVnodeInfo = &vnodeInfo[vLarge];
2891 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2892 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2896 nt_sync(fileSysDevice);
2898 sync(); /* This used to be done lower level, for every dir */
2905 /* Parse each vnode looking for orphaned vnodes and
2906 * connect them to the tree as orphaned (if requested).
2908 oldrootdir = rootdir;
2909 for (class = 0; class < nVNODECLASSES; class++) {
2910 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2911 vep = &(vnodeInfo[class].vnodes[v]);
2912 ThisVnode = bitNumberToVnodeNumber(v, class);
2913 ThisUnique = vep->unique;
2915 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2916 continue; /* Ignore unused, claimed, and root vnodes */
2918 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2919 * entry in this vnode had incremented the parent link count (In
2920 * JudgeEntry()). We need to go to the parent and decrement that
2921 * link count. But if the parent's unique is zero, then the parent
2922 * link count was not incremented in JudgeEntry().
2924 if (class == vLarge) { /* directory vnode */
2925 pv = vnodeIdToBitNumber(vep->parent);
2926 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2927 vnodeInfo[vLarge].vnodes[pv].count++;
2931 continue; /* If no rootdir, can't attach orphaned files */
2933 /* Here we attach orphaned files and directories into the
2934 * root directory, LVVnode, making sure link counts stay correct.
2936 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2937 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2938 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2940 /* Update this orphaned vnode's info. Its parent info and
2941 * link count (do for orphaned directories and files).
2943 vep->parent = LFVnode; /* Parent is the root dir */
2944 vep->unique = LFUnique;
2947 vep->count--; /* Inc link count (root dir will pt to it) */
2949 /* If this orphaned vnode is a directory, change '..'.
2950 * The name of the orphaned dir/file is unknown, so we
2951 * build a unique name. No need to CopyOnWrite the directory
2952 * since it is not connected to tree in BK or RO volume and
2953 * won't be visible there.
2955 if (class == vLarge) {
2959 /* Remove and recreate the ".." entry in this orphaned directory */
2960 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2961 vnodeInfo[class].inodes[v]);
2963 pa.Unique = LFUnique;
2964 assert(Delete(&dh, "..") == 0);
2965 assert(Create(&dh, "..", &pa) == 0);
2967 /* The original parent's link count was decremented above.
2968 * Here we increment the new parent's link count.
2970 pv = vnodeIdToBitNumber(LFVnode);
2971 vnodeInfo[vLarge].vnodes[pv].count--;
2975 /* Go to the root dir and add this entry. The link count of the
2976 * root dir was incremented when ".." was created. Try 10 times.
2978 for (j = 0; j < 10; j++) {
2979 pa.Vnode = ThisVnode;
2980 pa.Unique = ThisUnique;
2982 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
2984 vLarge) ? "__ORPHANDIR__" :
2985 "__ORPHANFILE__"), ThisVnode,
2988 CopyOnWrite(&rootdir);
2989 code = Create(&rootdir.dirHandle, npath, &pa);
2993 ThisUnique += 50; /* Try creating a different file */
2996 Log("Attaching orphaned %s to volume's root dir as %s\n",
2997 ((class == vLarge) ? "directory" : "file"), npath);
2999 } /* for each vnode in the class */
3000 } /* for each class of vnode */
3002 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3004 if (!oldrootdir.copied && rootdir.copied) {
3006 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3009 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3012 DFlush(); /* Flush the changes */
3013 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3014 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3015 orphans = ORPH_IGNORE;
3018 /* Write out all changed vnodes. Orphaned files and directories
3019 * will get removed here also (if requested).
3021 for (class = 0; class < nVNODECLASSES; class++) {
3022 int nVnodes = vnodeInfo[class].nVnodes;
3023 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3024 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3025 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3026 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3027 for (i = 0; i < nVnodes; i++) {
3028 register struct VnodeEssence *vnp = &vnodes[i];
3029 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3031 /* If the vnode is good but is unclaimed (not listed in
3032 * any directory entries), then it is orphaned.
3035 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3036 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3040 if (vnp->changed || vnp->count) {
3044 IH_IREAD(vnodeInfo[class].handle,
3045 vnodeIndexOffset(vcp, vnodeNumber),
3046 (char *)&vnode, sizeof(vnode));
3047 assert(nBytes == sizeof(vnode));
3049 vnode.parent = vnp->parent;
3050 oldCount = vnode.linkCount;
3051 vnode.linkCount = vnode.linkCount - vnp->count;
3054 orphaned = IsVnodeOrphaned(vnodeNumber);
3056 if (!vnp->todelete) {
3057 /* Orphans should have already been attached (if requested) */
3058 assert(orphans != ORPH_ATTACH);
3059 oblocks += vnp->blockCount;
3062 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3064 BlocksInVolume -= vnp->blockCount;
3066 if (VNDISK_GET_INO(&vnode)) {
3068 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3071 memset(&vnode, 0, sizeof(vnode));
3073 } else if (vnp->count) {
3075 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3078 vnode.modeBits = vnp->modeBits;
3081 vnode.dataVersion++;
3084 IH_IWRITE(vnodeInfo[class].handle,
3085 vnodeIndexOffset(vcp, vnodeNumber),
3086 (char *)&vnode, sizeof(vnode));
3087 assert(nBytes == sizeof(vnode));
3093 if (!Showmode && ofiles) {
3094 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3096 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3100 for (class = 0; class < nVNODECLASSES; class++) {
3101 register struct VnodeInfo *vip = &vnodeInfo[class];
3102 for (i = 0; i < vip->nVnodes; i++)
3103 if (vip->vnodes[i].name)
3104 free(vip->vnodes[i].name);
3111 /* Set correct resource utilization statistics */
3112 volHeader.filecount = FilesInVolume;
3113 volHeader.diskused = BlocksInVolume;
3115 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3116 if (volHeader.uniquifier < (maxunique + 1)) {
3118 Log("Volume uniquifier is too low; fixed\n");
3119 /* Plus 2,000 in case there are workstations out there with
3120 * cached vnodes that have since been deleted
3122 volHeader.uniquifier = (maxunique + 1 + 2000);
3125 /* Turn off the inUse bit; the volume's been salvaged! */
3126 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3127 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3128 volHeader.inService = 1; /* allow service again */
3129 volHeader.needsCallback = (VolumeChanged != 0);
3130 volHeader.dontSalvage = DONT_SALVAGE;
3133 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3134 assert(nBytes == sizeof(volHeader));
3137 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3138 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3139 FilesInVolume, BlocksInVolume);
3141 IH_RELEASE(vnodeInfo[vSmall].handle);
3142 IH_RELEASE(vnodeInfo[vLarge].handle);
3148 ClearROInUseBit(struct VolumeSummary *summary)
3150 IHandle_t *h = summary->volumeInfoHandle;
3151 afs_sfsize_t nBytes;
3153 VolumeDiskData volHeader;
3155 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3156 assert(nBytes == sizeof(volHeader));
3157 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3158 volHeader.inUse = 0;
3159 volHeader.needsSalvaged = 0;
3160 volHeader.inService = 1;
3161 volHeader.dontSalvage = DONT_SALVAGE;
3163 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3164 assert(nBytes == sizeof(volHeader));
3169 * Possible delete the volume.
3171 * deleteMe - Always do so, only a partial volume.
3174 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3177 if (readOnly(isp) || deleteMe) {
3178 if (isp->volSummary && isp->volSummary->fileName) {
3181 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3183 Log("It will be deleted on this server (you may find it elsewhere)\n");
3186 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3188 Log("it will be deleted instead. It should be recloned.\n");
3191 unlink(isp->volSummary->fileName);
3193 } else if (!check) {
3194 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3196 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3202 AskOffline(VolumeId volumeId, char * partition)
3206 for (i = 0; i < 3; i++) {
3207 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3209 if (code == SYNC_OK) {
3211 } else if (code == SYNC_DENIED) {
3212 #ifdef DEMAND_ATTACH_ENABLE
3213 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3215 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3217 Abort("Salvage aborted\n");
3218 } else if (code == SYNC_BAD_COMMAND) {
3219 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3221 #ifdef DEMAND_ATTACH_ENABLE
3222 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3224 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3226 Abort("Salvage aborted\n");
3229 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3230 FSYNC_clientFinis();
3234 if (code != SYNC_OK) {
3235 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3236 Abort("Salvage aborted\n");
3241 AskOnline(VolumeId volumeId, char *partition)
3245 for (i = 0; i < 3; i++) {
3246 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3248 if (code == SYNC_OK) {
3250 } else if (code == SYNC_DENIED) {
3251 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3252 } else if (code == SYNC_BAD_COMMAND) {
3253 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3255 #ifdef DEMAND_ATTACH_ENABLE
3256 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3258 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3263 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3264 FSYNC_clientFinis();
3271 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3273 /* Volume parameter is passed in case iopen is upgraded in future to
3274 * require a volume Id to be passed
3277 IHandle_t *srcH, *destH;
3278 FdHandle_t *srcFdP, *destFdP;
3281 IH_INIT(srcH, device, rwvolume, inode1);
3282 srcFdP = IH_OPEN(srcH);
3283 assert(srcFdP != NULL);
3284 IH_INIT(destH, device, rwvolume, inode2);
3285 destFdP = IH_OPEN(destH);
3287 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3288 assert(FDH_WRITE(destFdP, buf, n) == n);
3290 FDH_REALLYCLOSE(srcFdP);
3291 FDH_REALLYCLOSE(destFdP);
3298 PrintInodeList(void)
3300 register struct ViceInodeInfo *ip;
3301 struct ViceInodeInfo *buf;
3302 struct afs_stat status;
3303 register int nInodes;
3305 assert(afs_fstat(inodeFd, &status) == 0);
3306 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3307 assert(buf != NULL);
3308 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3309 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3310 for (ip = buf; nInodes--; ip++) {
3311 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3312 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3313 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3314 ip->u.param[2], ip->u.param[3]);
3320 PrintInodeSummary(void)
3323 struct InodeSummary *isp;
3325 for (i = 0; i < nVolumesInInodeFile; i++) {
3326 isp = &inodeSummary[i];
3327 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3332 PrintVolumeSummary(void)
3335 struct VolumeSummary *vsp;
3337 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3338 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3348 assert(0); /* Fork is never executed in the NT code path */
3352 #ifdef AFS_DEMAND_ATTACH_FS
3353 if ((f == 0) && (programType == salvageServer)) {
3354 /* we are a salvageserver child */
3355 #ifdef FSSYNC_BUILD_CLIENT
3356 VChildProcReconnectFS_r();
3358 #ifdef SALVSYNC_BUILD_CLIENT
3362 #endif /* AFS_DEMAND_ATTACH_FS */
3363 #endif /* !AFS_NT40_ENV */
3373 #ifdef AFS_DEMAND_ATTACH_FS
3374 if (programType == salvageServer) {
3375 #ifdef SALVSYNC_BUILD_CLIENT
3378 #ifdef FSSYNC_BUILD_CLIENT
3382 #endif /* AFS_DEMAND_ATTACH_FS */
3385 if (main_thread != pthread_self())
3386 pthread_exit((void *)code);
3399 pid = wait(&status);
3401 if (WCOREDUMP(status))
3402 Log("\"%s\" core dumped!\n", prog);
3403 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3409 TimeStamp(time_t clock, int precision)
3412 static char timestamp[20];
3413 lt = localtime(&clock);
3415 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3417 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3422 CheckLogFile(char * log_path)
3424 char oldSlvgLog[AFSDIR_PATH_MAX];
3426 #ifndef AFS_NT40_ENV
3433 strcpy(oldSlvgLog, log_path);
3434 strcat(oldSlvgLog, ".old");
3436 renamefile(log_path, oldSlvgLog);
3437 logFile = afs_fopen(log_path, "a");
3439 if (!logFile) { /* still nothing, use stdout */
3443 #ifndef AFS_NAMEI_ENV
3444 AFS_DEBUG_IOPS_LOG(logFile);
3449 #ifndef AFS_NT40_ENV
3451 TimeStampLogFile(char * log_path)
3453 char stampSlvgLog[AFSDIR_PATH_MAX];
3458 lt = localtime(&now);
3459 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3460 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3461 log_path, lt->tm_year + 1900,
3462 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3465 /* try to link the logfile to a timestamped filename */
3466 /* if it fails, oh well, nothing we can do */
3467 link(log_path, stampSlvgLog);
3476 #ifndef AFS_NT40_ENV
3478 printf("Can't show log since using syslog.\n");
3487 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3490 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3493 while (fgets(line, sizeof(line), logFile))
3500 Log(const char *format, ...)
3506 va_start(args, format);
3507 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3509 #ifndef AFS_NT40_ENV
3511 syslog(LOG_INFO, "%s", tmp);
3515 gettimeofday(&now, 0);
3516 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3522 Abort(const char *format, ...)
3527 va_start(args, format);
3528 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3530 #ifndef AFS_NT40_ENV
3532 syslog(LOG_INFO, "%s", tmp);
3536 fprintf(logFile, "%s", tmp);
3551 p = (char *)malloc(strlen(s) + 1);
3557 /* Remove the FORCESALVAGE file */
3559 RemoveTheForce(char *path)
3562 struct afs_stat force; /* so we can use afs_stat to find it */
3563 strcpy(target,path);
3564 strcat(target,"/FORCESALVAGE");
3565 if (!Testing && ForceSalvage) {
3566 if (afs_stat(target,&force) == 0) unlink(target);
3570 #ifndef AFS_AIX32_ENV
3572 * UseTheForceLuke - see if we can use the force
3575 UseTheForceLuke(char *path)
3577 struct afs_stat force;
3579 strcpy(target,path);
3580 strcat(target,"/FORCESALVAGE");
3582 return (afs_stat(target, &force) == 0);
3586 * UseTheForceLuke - see if we can use the force
3589 * The VRMIX fsck will not muck with the filesystem it is supposedly
3590 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3591 * muck directly with the root inode, which is within the normal
3593 * ListViceInodes() has a side effect of setting ForceSalvage if
3594 * it detects a need, based on root inode examination.
3597 UseTheForceLuke(char *path)
3600 return 0; /* sorry OB1 */
3605 /* NT support routines */
3607 static char execpathname[MAX_PATH];
3609 nt_SalvagePartition(char *partName, int jobn)
3614 if (!*execpathname) {
3615 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3616 if (!n || n == 1023)
3619 job.cj_magic = SALVAGER_MAGIC;
3620 job.cj_number = jobn;
3621 (void)strcpy(job.cj_part, partName);
3622 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3627 nt_SetupPartitionSalvage(void *datap, int len)
3629 childJob_t *jobp = (childJob_t *) datap;
3630 char logname[AFSDIR_PATH_MAX];
3632 if (len != sizeof(childJob_t))
3634 if (jobp->cj_magic != SALVAGER_MAGIC)
3639 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3641 logFile = afs_fopen(logname, "w");
3649 #endif /* AFS_NT40_ENV */