2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
197 /*@+fcnmacros +macrofcndecl@*/
200 extern off64_t afs_lseek(int FD, off64_t O, int F);
201 #endif /*S_SPLINT_S */
202 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
203 #define afs_stat stat64
204 #define afs_fstat fstat64
205 #define afs_open open64
206 #define afs_fopen fopen64
207 #else /* !O_LARGEFILE */
209 extern off_t afs_lseek(int FD, off_t O, int F);
210 #endif /*S_SPLINT_S */
211 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
212 #define afs_stat stat
213 #define afs_fstat fstat
214 #define afs_open open
215 #define afs_fopen fopen
216 #endif /* !O_LARGEFILE */
217 /*@=fcnmacros =macrofcndecl@*/
220 extern void *calloc();
222 static char *TimeStamp(time_t clock, int precision);
225 int debug; /* -d flag */
226 extern int Testing; /* -n flag */
227 int ListInodeOption; /* -i flag */
228 int ShowRootFiles; /* -r flag */
229 int RebuildDirs; /* -sal flag */
230 int Parallel = 4; /* -para X flag */
231 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
232 int forceR = 0; /* -b flag */
233 int ShowLog = 0; /* -showlog flag */
234 int ShowSuid = 0; /* -showsuid flag */
235 int ShowMounts = 0; /* -showmounts flag */
236 int orphans = ORPH_IGNORE; /* -orphans option */
241 int useSyslog = 0; /* -syslog flag */
242 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
251 #define MAXPARALLEL 32
253 int OKToZap; /* -o flag */
254 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
255 * in the volume header */
257 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
259 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
261 Device fileSysDevice; /* The device number of the current
262 * partition being salvaged */
266 char *fileSysPath; /* The path of the mounted partition currently
267 * being salvaged, i.e. the directory
268 * containing the volume headers */
270 char *fileSysPathName; /* NT needs this to make name pretty in log. */
271 IHandle_t *VGLinkH; /* Link handle for current volume group. */
272 int VGLinkH_cnt; /* # of references to lnk handle. */
273 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
275 char *fileSysDeviceName; /* The block device where the file system
276 * being salvaged was mounted */
277 char *filesysfulldev;
279 int VolumeChanged; /* Set by any routine which would change the volume in
280 * a way which would require callback is to be broken if the
281 * volume was put back on line by an active file server */
283 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
285 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
286 int inodeFd; /* File descriptor for inode file */
289 struct VnodeInfo vnodeInfo[nVNODECLASSES];
292 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
293 int nVolumes; /* Number of volumes (read-write and read-only)
294 * in volume summary */
300 /* Forward declarations */
301 /*@printflike@*/ void Log(const char *format, ...);
302 /*@printflike@*/ void Abort(const char *format, ...);
303 static int IsVnodeOrphaned(VnodeId vnode);
305 /* Uniquifier stored in the Inode */
310 return (u & 0x3fffff);
312 #if defined(AFS_SGI_EXMAG)
313 return (u & SGI_UNIQMASK);
316 #endif /* AFS_SGI_EXMAG */
321 BadError(register int aerror)
323 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
325 return 0; /* otherwise may be transient, e.g. EMFILE */
330 char *save_args[MAX_ARGS];
332 extern pthread_t main_thread;
333 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
336 /* Get the salvage lock if not already held. Hold until process exits. */
338 ObtainSalvageLock(void)
344 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
345 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
346 if (salvageLock == INVALID_FD) {
348 "salvager: There appears to be another salvager running! Aborted.\n");
353 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
354 if (salvageLock < 0) {
356 "salvager: can't open salvage lock file %s, aborting\n",
357 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
360 #ifdef AFS_DARWIN_ENV
361 if (flock(salvageLock, LOCK_EX) == -1) {
363 if (lockf(salvageLock, F_LOCK, 0) == -1) {
366 "salvager: There appears to be another salvager running! Aborted.\n");
373 #ifdef AFS_SGI_XFS_IOPS_ENV
374 /* Check if the given partition is mounted. For XFS, the root inode is not a
375 * constant. So we check the hard way.
378 IsPartitionMounted(char *part)
381 struct mntent *mntent;
383 assert(mntfp = setmntent(MOUNTED, "r"));
384 while (mntent = getmntent(mntfp)) {
385 if (!strcmp(part, mntent->mnt_dir))
390 return mntent ? 1 : 1;
393 /* Check if the given inode is the root of the filesystem. */
394 #ifndef AFS_SGI_XFS_IOPS_ENV
396 IsRootInode(struct afs_stat *status)
399 * The root inode is not a fixed value in XFS partitions. So we need to
400 * see if the partition is in the list of mounted partitions. This only
401 * affects the SalvageFileSys path, so we check there.
403 return (status->st_ino == ROOTINODE);
408 #ifndef AFS_NAMEI_ENV
409 /* We don't want to salvage big files filesystems, since we can't put volumes on
413 CheckIfBigFilesFS(char *mountPoint, char *devName)
415 struct superblock fs;
418 if (strncmp(devName, "/dev/", 5)) {
419 (void)sprintf(name, "/dev/%s", devName);
421 (void)strcpy(name, devName);
424 if (ReadSuper(&fs, name) < 0) {
425 Log("Unable to read superblock. Not salvaging partition %s.\n",
429 if (IsBigFilesFileSystem(&fs)) {
430 Log("Partition %s is a big files filesystem, not salvaging.\n",
440 #define HDSTR "\\Device\\Harddisk"
441 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
443 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
448 static int dowarn = 1;
450 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
452 if (strncmp(res, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res, HDSTR, p1->devName);
460 d1 = atoi(&res[HDLEN]);
462 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
464 if (strncmp(res, HDSTR, HDLEN)) {
467 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
468 res, HDSTR, p2->devName);
472 d2 = atoi(&res[HDLEN]);
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, '/');
698 ptr = (char *)strrchr(pbuffer, '/');
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 static char tmpDevName[100];
712 static char wpath[100];
713 struct VolumeSummary *vsp, *esp;
716 fileSysPartition = partP;
717 fileSysDevice = fileSysPartition->device;
718 fileSysPathName = VPartitionPath(fileSysPartition);
721 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
722 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
723 name = partP->devName;
725 fileSysPath = fileSysPathName;
726 strcpy(tmpDevName, partP->devName);
727 name = get_DevName(tmpDevName, wpath);
728 fileSysDeviceName = name;
729 filesysfulldev = wpath;
732 VLockPartition(partP->name);
733 if (singleVolumeNumber || ForceSalvage)
736 ForceSalvage = UseTheForceLuke(fileSysPath);
738 if (singleVolumeNumber) {
739 /* salvageserver already setup fssync conn for us */
740 if ((programType != salvageServer) && !VConnectFS()) {
741 Abort("Couldn't connect to file server\n");
743 AskOffline(singleVolumeNumber, partP->name);
746 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
747 partP->name, name, (Testing ? "(READONLY mode)" : ""));
749 Log("***Forced salvage of all volumes on this partition***\n");
754 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
761 assert((dirp = opendir(fileSysPath)) != NULL);
762 while ((dp = readdir(dirp))) {
763 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
764 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
766 Log("Removing old salvager temp files %s\n", dp->d_name);
767 strcpy(npath, fileSysPath);
769 strcat(npath, dp->d_name);
775 tdir = (tmpdir ? tmpdir : fileSysPath);
777 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
778 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
780 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
783 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
784 unlink(inodeListPath);
788 /* Using nt_unlink here since we're really using the delete on close
789 * semantics of unlink. In most places in the salvager, we really do
790 * mean to unlink the file at that point. Those places have been
791 * modified to actually do that so that the NT crt can be used there.
794 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
795 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
797 inodeFd = afs_open(inodeListPath, O_RDONLY);
798 unlink(inodeListPath);
801 Abort("Temporary file %s is missing...\n", inodeListPath);
802 if (ListInodeOption) {
806 /* enumerate volumes in the partition.
807 * figure out sets of read-only + rw volumes.
808 * salvage each set, read-only volumes first, then read-write.
809 * Fix up inodes on last volume in set (whether it is read-write
812 GetVolumeSummary(singleVolumeNumber);
814 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
815 i < nVolumesInInodeFile; i = j) {
816 VolumeId rwvid = inodeSummary[i].RWvolumeId;
818 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
820 VolumeId vid = inodeSummary[j].volumeId;
821 struct VolumeSummary *tsp;
822 /* Scan volume list (from partition root directory) looking for the
823 * current rw volume number in the volume list from the inode scan.
824 * If there is one here that is not in the inode volume list,
826 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
828 DeleteExtraVolumeHeaderFile(vsp);
830 /* Now match up the volume summary info from the root directory with the
831 * entry in the volume list obtained from scanning inodes */
832 inodeSummary[j].volSummary = NULL;
833 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
834 if (tsp->header.id == vid) {
835 inodeSummary[j].volSummary = tsp;
841 /* Salvage the group of volumes (several read-only + 1 read/write)
842 * starting with the current read-only volume we're looking at.
844 SalvageVolumeGroup(&inodeSummary[i], j - i);
847 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
848 for (; vsp < esp; vsp++) {
850 DeleteExtraVolumeHeaderFile(vsp);
853 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
854 RemoveTheForce(fileSysPath);
856 if (!Testing && singleVolumeNumber) {
857 AskOnline(singleVolumeNumber, fileSysPartition->name);
859 /* Step through the volumeSummary list and set all volumes on-line.
860 * The volumes were taken off-line in GetVolumeSummary.
862 for (j = 0; j < nVolumes; j++) {
863 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
867 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
868 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
871 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
875 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
878 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
881 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
884 Log("Unable to unlink %s (errno = %d)\n", path, errno);
891 CompareInodes(const void *_p1, const void *_p2)
893 register const struct ViceInodeInfo *p1 = _p1;
894 register const struct ViceInodeInfo *p2 = _p2;
895 if (p1->u.vnode.vnodeNumber == INODESPECIAL
896 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
897 VolumeId p1rwid, p2rwid;
899 (p1->u.vnode.vnodeNumber ==
900 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
902 (p2->u.vnode.vnodeNumber ==
903 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
908 if (p1->u.vnode.vnodeNumber == INODESPECIAL
909 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
910 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
911 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
912 if (p1->u.vnode.volumeId == p1rwid)
914 if (p2->u.vnode.volumeId == p2rwid)
916 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
918 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
919 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
920 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
922 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
924 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
926 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
928 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
930 /* The following tests are reversed, so that the most desirable
931 * of several similar inodes comes first */
932 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
934 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
935 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
939 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
940 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
945 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
947 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
948 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
952 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
953 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
958 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
960 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
961 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
965 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
966 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
971 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
973 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
974 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
978 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
979 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
988 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
989 register struct InodeSummary *summary)
991 VolumeId volume = ip->u.vnode.volumeId;
992 VolumeId rwvolume = volume;
993 register int n, nSpecial;
994 register Unique maxunique;
997 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
999 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1001 rwvolume = ip->u.special.parentId;
1002 /* This isn't quite right, as there could (in error) be different
1003 * parent inodes in different special vnodes */
1005 if (maxunique < ip->u.vnode.vnodeUniquifier)
1006 maxunique = ip->u.vnode.vnodeUniquifier;
1010 summary->volumeId = volume;
1011 summary->RWvolumeId = rwvolume;
1012 summary->nInodes = n;
1013 summary->nSpecialInodes = nSpecial;
1014 summary->maxUniquifier = maxunique;
1018 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1020 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1021 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1022 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1027 * Collect list of inodes in file named by path. If a truly fatal error,
1028 * unlink the file and abort. For lessor errors, return -1. The file will
1029 * be unlinked by the caller.
1032 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1034 struct afs_stat status;
1036 struct ViceInodeInfo *ip;
1037 struct InodeSummary summary;
1038 char summaryFileName[50];
1041 char *dev = fileSysPath;
1042 char *wpath = fileSysPath;
1044 char *dev = fileSysDeviceName;
1045 char *wpath = filesysfulldev;
1047 char *part = fileSysPath;
1050 /* This file used to come from vfsck; cobble it up ourselves now... */
1052 ListViceInodes(dev, fileSysPath, path,
1053 singleVolumeNumber ? OnlyOneVolume : 0,
1054 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1056 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1060 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1062 if (forceSal && !ForceSalvage) {
1063 Log("***Forced salvage of all volumes on this partition***\n");
1066 inodeFd = afs_open(path, O_RDWR);
1067 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1069 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1071 tdir = (tmpdir ? tmpdir : part);
1073 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1074 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1076 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1077 "%s/salvage.temp.%d", tdir, getpid());
1079 summaryFile = afs_fopen(summaryFileName, "a+");
1080 if (summaryFile == NULL) {
1083 Abort("Unable to create inode summary file\n");
1085 if (!canfork || debug || Fork() == 0) {
1087 unsigned long st_size=(unsigned long) status.st_size;
1088 nInodes = st_size / sizeof(struct ViceInodeInfo);
1090 fclose(summaryFile);
1092 unlink(summaryFileName);
1093 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1094 RemoveTheForce(fileSysPath);
1096 struct VolumeSummary *vsp;
1099 GetVolumeSummary(singleVolumeNumber);
1101 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1103 DeleteExtraVolumeHeaderFile(vsp);
1106 Log("%s vice inodes on %s; not salvaged\n",
1107 singleVolumeNumber ? "No applicable" : "No", dev);
1110 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1112 fclose(summaryFile);
1115 unlink(summaryFileName);
1117 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1120 if (read(inodeFd, ip, st_size) != st_size) {
1121 fclose(summaryFile);
1124 unlink(summaryFileName);
1125 Abort("Unable to read inode table; %s not salvaged\n", dev);
1127 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1128 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1129 || write(inodeFd, ip, st_size) != st_size) {
1130 fclose(summaryFile);
1133 unlink(summaryFileName);
1134 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1138 CountVolumeInodes(ip, nInodes, &summary);
1139 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1140 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1141 fclose(summaryFile);
1145 summary.index += (summary.nInodes);
1146 nInodes -= summary.nInodes;
1147 ip += summary.nInodes;
1149 /* Following fflush is not fclose, because if it was debug mode would not work */
1150 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1151 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1152 fclose(summaryFile);
1156 if (canfork && !debug) {
1161 if (Wait("Inode summary") == -1) {
1162 fclose(summaryFile);
1165 unlink(summaryFileName);
1166 Exit(1); /* salvage of this partition aborted */
1169 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1170 if (status.st_size != 0) {
1172 unsigned long st_status=(unsigned long)status.st_size;
1173 inodeSummary = (struct InodeSummary *)malloc(st_status);
1174 assert(inodeSummary != NULL);
1175 /* For GNU we need to do lseek to get the file pointer moved. */
1176 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1177 ret = read(fileno(summaryFile), inodeSummary, st_status);
1178 assert(ret == st_status);
1180 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1181 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1182 fclose(summaryFile);
1184 unlink(summaryFileName);
1188 /* Comparison routine for volume sort.
1189 This is setup so that a read-write volume comes immediately before
1190 any read-only clones of that volume */
1192 CompareVolumes(const void *_p1, const void *_p2)
1194 register const struct VolumeSummary *p1 = _p1;
1195 register const struct VolumeSummary *p2 = _p2;
1196 if (p1->header.parent != p2->header.parent)
1197 return p1->header.parent < p2->header.parent ? -1 : 1;
1198 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1200 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1202 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1206 GetVolumeSummary(VolumeId singleVolumeNumber)
1209 afs_int32 nvols = 0;
1210 struct VolumeSummary *vsp, vs;
1211 struct VolumeDiskHeader diskHeader;
1214 /* Get headers from volume directory */
1215 dirp = opendir(fileSysPath);
1217 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1218 if (!singleVolumeNumber) {
1219 while ((dp = readdir(dirp))) {
1220 char *p = dp->d_name;
1221 p = strrchr(dp->d_name, '.');
1222 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1225 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1226 if ((fd = afs_open(name, O_RDONLY)) != -1
1227 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1228 == sizeof(diskHeader)
1229 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1230 DiskToVolumeHeader(&vs.header, &diskHeader);
1238 dirp = opendir("."); /* No rewinddir for NT */
1245 (struct VolumeSummary *)malloc(nvols *
1246 sizeof(struct VolumeSummary));
1249 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1250 assert(volumeSummaryp != NULL);
1253 vsp = volumeSummaryp;
1254 while ((dp = readdir(dirp))) {
1255 char *p = dp->d_name;
1256 p = strrchr(dp->d_name, '.');
1257 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1261 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1262 if ((fd = afs_open(name, O_RDONLY)) == -1
1263 || read(fd, &diskHeader, sizeof(diskHeader))
1264 != sizeof(diskHeader)
1265 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1270 if (!singleVolumeNumber) {
1272 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1275 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1280 char nameShouldBe[64];
1281 DiskToVolumeHeader(&vsp->header, &diskHeader);
1282 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1283 && vsp->header.parent != singleVolumeNumber) {
1284 if (programType == salvageServer) {
1285 #ifdef SALVSYNC_BUILD_CLIENT
1286 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1287 vsp->header.id, vsp->header.parent);
1288 if (SALVSYNC_LinkVolume(vsp->header.parent,
1290 fileSysPartition->name,
1292 Log("schedule request failed\n");
1295 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1297 Log("%u is a read-only volume; not salvaged\n",
1298 singleVolumeNumber);
1302 if (!singleVolumeNumber
1303 || (vsp->header.id == singleVolumeNumber
1304 || vsp->header.parent == singleVolumeNumber)) {
1305 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1306 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1307 if (singleVolumeNumber
1308 && vsp->header.id != singleVolumeNumber)
1309 AskOffline(vsp->header.id, fileSysPartition->name);
1310 if (strcmp(nameShouldBe, dp->d_name)) {
1312 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", name, (Testing ? "it would have been " : ""));
1315 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1319 vsp->fileName = ToString(dp->d_name);
1329 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1333 /* Find the link table. This should be associated with the RW volume or, if
1334 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1337 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1338 struct ViceInodeInfo *allInodes)
1341 struct ViceInodeInfo *ip;
1343 for (i = 0; i < nVols; i++) {
1344 ip = allInodes + isp[i].index;
1345 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1346 if (ip[j].u.special.type == VI_LINKTABLE)
1347 return ip[j].inodeNumber;
1354 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1356 struct versionStamp version;
1359 if (!VALID_INO(ino))
1361 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1362 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1363 if (!VALID_INO(ino))
1365 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1366 isp->RWvolumeId, errno);
1367 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1368 fdP = IH_OPEN(VGLinkH);
1370 Abort("Can't open link table for volume %u (error = %d)\n",
1371 isp->RWvolumeId, errno);
1373 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1374 Abort("Can't truncate link table for volume %u (error = %d)\n",
1375 isp->RWvolumeId, errno);
1377 version.magic = LINKTABLEMAGIC;
1378 version.version = LINKTABLEVERSION;
1380 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1382 Abort("Can't truncate link table for volume %u (error = %d)\n",
1383 isp->RWvolumeId, errno);
1385 FDH_REALLYCLOSE(fdP);
1387 /* If the volume summary exits (i.e., the V*.vol header file exists),
1388 * then set this inode there as well.
1390 if (isp->volSummary)
1391 isp->volSummary->header.linkTable = ino;
1400 SVGParms_t *parms = (SVGParms_t *) arg;
1401 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1406 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1409 pthread_attr_t tattr;
1413 /* Initialize per volume global variables, even if later code does so */
1417 memset(&VolInfo, 0, sizeof(VolInfo));
1419 parms.svgp_inodeSummaryp = isp;
1420 parms.svgp_count = nVols;
1421 code = pthread_attr_init(&tattr);
1423 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1427 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1429 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1432 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1434 Log("Failed to create thread to salvage volume group %u\n",
1438 (void)pthread_join(tid, NULL);
1440 #endif /* AFS_NT40_ENV */
1443 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1445 struct ViceInodeInfo *inodes, *allInodes, *ip;
1446 int i, totalInodes, size, salvageTo;
1450 int dec_VGLinkH = 0;
1452 FdHandle_t *fdP = NULL;
1455 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1456 && isp->nSpecialInodes > 0);
1457 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1458 if (!ForceSalvage && QuickCheck(isp, nVols))
1461 if (ShowMounts && !haveRWvolume)
1463 if (canfork && !debug && Fork() != 0) {
1464 (void)Wait("Salvage volume group");
1467 for (i = 0, totalInodes = 0; i < nVols; i++)
1468 totalInodes += isp[i].nInodes;
1469 size = totalInodes * sizeof(struct ViceInodeInfo);
1470 inodes = (struct ViceInodeInfo *)malloc(size);
1471 allInodes = inodes - isp->index; /* this would the base of all the inodes
1472 * for the partition, if all the inodes
1473 * had been read into memory */
1475 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1477 assert(read(inodeFd, inodes, size) == size);
1479 /* Don't try to salvage a read write volume if there isn't one on this
1481 salvageTo = haveRWvolume ? 0 : 1;
1483 #ifdef AFS_NAMEI_ENV
1484 ino = FindLinkHandle(isp, nVols, allInodes);
1485 if (VALID_INO(ino)) {
1486 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1487 fdP = IH_OPEN(VGLinkH);
1489 if (!VALID_INO(ino) || fdP == NULL) {
1490 Log("%s link table for volume %u.\n",
1491 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1493 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1496 struct ViceInodeInfo *ip;
1497 CreateLinkTable(isp, ino);
1498 fdP = IH_OPEN(VGLinkH);
1499 /* Sync fake 1 link counts to the link table, now that it exists */
1501 for (i = 0; i < nVols; i++) {
1502 ip = allInodes + isp[i].index;
1503 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1505 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1507 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1515 FDH_REALLYCLOSE(fdP);
1517 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1520 /* Salvage in reverse order--read/write volume last; this way any
1521 * Inodes not referenced by the time we salvage the read/write volume
1522 * can be picked up by the read/write volume */
1523 /* ACTUALLY, that's not done right now--the inodes just vanish */
1524 for (i = nVols - 1; i >= salvageTo; i--) {
1526 struct InodeSummary *lisp = &isp[i];
1527 #ifdef AFS_NAMEI_ENV
1528 /* If only the RO is present on this partition, the link table
1529 * shows up as a RW volume special file. Need to make sure the
1530 * salvager doesn't try to salvage the non-existent RW.
1532 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1533 /* If this only special inode is the link table, continue */
1534 if (inodes->u.special.type == VI_LINKTABLE) {
1541 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1542 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1543 /* Check inodes twice. The second time do things seriously. This
1544 * way the whole RO volume can be deleted, below, if anything goes wrong */
1545 for (check = 1; check >= 0; check--) {
1547 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1549 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1550 if (rw && deleteMe) {
1551 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1552 * volume won't be called */
1558 if (rw && check == 1)
1560 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1561 MaybeZapVolume(lisp, "Vnode index", 0, check);
1567 /* Fix actual inode counts */
1569 Log("totalInodes %d\n",totalInodes);
1570 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1571 static int TraceBadLinkCounts = 0;
1572 #ifdef AFS_NAMEI_ENV
1573 if (VGLinkH->ih_ino == ip->inodeNumber) {
1574 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1575 VGLinkH_p1 = ip->u.param[0];
1576 continue; /* Deal with this last. */
1579 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1580 TraceBadLinkCounts--; /* Limit reports, per volume */
1581 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1583 while (ip->linkCount > 0) {
1584 /* below used to assert, not break */
1586 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1587 Log("idec failed. inode %s errno %d\n",
1588 PrintInode(NULL, ip->inodeNumber), errno);
1594 while (ip->linkCount < 0) {
1595 /* these used to be asserts */
1597 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1598 Log("iinc failed. inode %s errno %d\n",
1599 PrintInode(NULL, ip->inodeNumber), errno);
1606 #ifdef AFS_NAMEI_ENV
1607 while (dec_VGLinkH > 0) {
1608 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1609 Log("idec failed on link table, errno = %d\n", errno);
1613 while (dec_VGLinkH < 0) {
1614 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1615 Log("iinc failed on link table, errno = %d\n", errno);
1622 /* Directory consistency checks on the rw volume */
1624 SalvageVolume(isp, VGLinkH);
1625 IH_RELEASE(VGLinkH);
1627 if (canfork && !debug) {
1634 QuickCheck(register struct InodeSummary *isp, int nVols)
1636 /* Check headers BEFORE forking */
1640 for (i = 0; i < nVols; i++) {
1641 struct VolumeSummary *vs = isp[i].volSummary;
1642 VolumeDiskData volHeader;
1644 /* Don't salvage just because phantom rw volume is there... */
1645 /* (If a read-only volume exists, read/write inodes must also exist) */
1646 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1650 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1651 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1652 == sizeof(volHeader)
1653 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1654 && volHeader.dontSalvage == DONT_SALVAGE
1655 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1656 if (volHeader.inUse != 0) {
1657 volHeader.inUse = 0;
1658 volHeader.inService = 1;
1660 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1661 != sizeof(volHeader)) {
1677 /* SalvageVolumeHeaderFile
1679 * Salvage the top level V*.vol header file. Make sure the special files
1680 * exist and that there are no duplicates.
1682 * Calls SalvageHeader for each possible type of volume special file.
1686 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1687 register struct ViceInodeInfo *inodes, int RW,
1688 int check, int *deleteMe)
1692 register struct ViceInodeInfo *ip;
1693 int allinodesobsolete = 1;
1694 struct VolumeDiskHeader diskHeader;
1697 /* keeps track of special inodes that are probably 'good'; they are
1698 * referenced in the vol header, and are included in the given inodes
1703 } goodspecial[MAXINODETYPE];
1708 memset(goodspecial, 0, sizeof(goodspecial));
1710 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
1712 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
1714 Log("cannot allocate memory for inode skip array when salvaging "
1715 "volume %lu; not performing duplicate special inode recovery\n",
1716 afs_printable_uint32_lu(isp->volumeId));
1717 /* still try to perform the salvage; the skip array only does anything
1718 * if we detect duplicate special inodes */
1722 * First, look at the special inodes and see if any are referenced by
1723 * the existing volume header. If we find duplicate special inodes, we
1724 * can use this information to use the referenced inode (it's more
1725 * likely to be the 'good' one), and throw away the duplicates.
1727 if (isp->volSummary && skip) {
1728 /* use tempHeader, so we can use the stuff[] array to easily index
1729 * into the isp->volSummary special inodes */
1730 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
1732 for (i = 0; i < isp->nSpecialInodes; i++) {
1733 ip = &inodes[isp->index + i];
1734 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1735 /* will get taken care of in a later loop */
1738 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
1739 goodspecial[ip->u.special.type-1].valid = 1;
1740 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
1745 memset(&tempHeader, 0, sizeof(tempHeader));
1746 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1747 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1748 tempHeader.id = isp->volumeId;
1749 tempHeader.parent = isp->RWvolumeId;
1751 /* Check for duplicates (inodes are sorted by type field) */
1752 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1753 ip = &inodes[isp->index + i];
1754 if (ip->u.special.type == (ip + 1)->u.special.type) {
1755 afs_ino_str_t stmp1, stmp2;
1757 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1758 /* Will be caught in the loop below */
1762 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
1763 ip->u.special.type, isp->volumeId,
1764 PrintInode(stmp1, ip->inodeNumber),
1765 PrintInode(stmp2, (ip+1)->inodeNumber));
1767 if (skip && goodspecial[ip->u.special.type-1].valid) {
1768 Inode gi = goodspecial[ip->u.special.type-1].inode;
1771 Log("using special inode referenced by vol header (%s)\n",
1772 PrintInode(stmp1, gi));
1775 /* the volume header references some special inode of
1776 * this type in the inodes array; are we it? */
1777 if (ip->inodeNumber != gi) {
1779 } else if ((ip+1)->inodeNumber != gi) {
1780 /* in case this is the last iteration; we need to
1781 * make sure we check ip+1, too */
1786 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
1794 for (i = 0; i < isp->nSpecialInodes; i++) {
1795 ip = &inodes[isp->index + i];
1796 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1798 Log("Rubbish header inode %s of type %d\n",
1799 PrintInode(NULL, ip->inodeNumber),
1800 ip->u.special.type);
1806 Log("Rubbish header inode %s of type %d; deleted\n",
1807 PrintInode(NULL, ip->inodeNumber),
1808 ip->u.special.type);
1809 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1810 if (skip && skip[i]) {
1811 if (orphans == ORPH_REMOVE) {
1812 Log("Removing orphan special inode %s of type %d\n",
1813 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1816 Log("Ignoring orphan special inode %s of type %d\n",
1817 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1818 /* fall through to the ip->linkCount--; line below */
1821 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1822 allinodesobsolete = 0;
1824 if (!check && ip->u.special.type != VI_LINKTABLE)
1825 ip->linkCount--; /* Keep the inode around */
1833 if (allinodesobsolete) {
1840 VGLinkH_cnt++; /* one for every header. */
1842 if (!RW && !check && isp->volSummary) {
1843 ClearROInUseBit(isp->volSummary);
1847 for (i = 0; i < MAXINODETYPE; i++) {
1848 if (stuff[i].inodeType == VI_LINKTABLE) {
1849 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1850 * And we may have recreated the link table earlier, so set the
1851 * RW header as well.
1853 if (VALID_INO(VGLinkH->ih_ino)) {
1854 *stuff[i].inode = VGLinkH->ih_ino;
1858 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1862 if (isp->volSummary == NULL) {
1864 char headerName[64];
1865 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1866 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1868 Log("No header file for volume %u\n", isp->volumeId);
1872 Log("No header file for volume %u; %screating %s\n",
1873 isp->volumeId, (Testing ? "it would have been " : ""),
1875 headerFd = afs_open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
1876 assert(headerFd != -1);
1877 isp->volSummary = (struct VolumeSummary *)
1878 malloc(sizeof(struct VolumeSummary));
1879 isp->volSummary->fileName = ToString(headerName);
1882 char headerName[64];
1883 /* hack: these two fields are obsolete... */
1884 isp->volSummary->header.volumeAcl = 0;
1885 isp->volSummary->header.volumeMountTable = 0;
1888 (&isp->volSummary->header, &tempHeader,
1889 sizeof(struct VolumeHeader))) {
1890 /* We often remove the name before calling us, so we make a fake one up */
1891 if (isp->volSummary->fileName) {
1892 strcpy(headerName, isp->volSummary->fileName);
1894 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1895 isp->volSummary->fileName = ToString(headerName);
1897 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1899 Log("Header file %s is damaged or no longer valid%s\n", path,
1900 (check ? "" : "; repairing"));
1904 headerFd = afs_open(path, O_RDWR | O_TRUNC, 0644);
1905 assert(headerFd != -1);
1909 memcpy(&isp->volSummary->header, &tempHeader,
1910 sizeof(struct VolumeHeader));
1913 Log("It would have written a new header file for volume %u\n",
1916 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1917 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1918 != sizeof(struct VolumeDiskHeader)) {
1919 Log("Couldn't rewrite volume header file!\n");
1926 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1927 isp->volSummary->header.volumeInfo);
1932 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1936 VolumeDiskData volumeInfo;
1937 struct versionStamp fileHeader;
1946 #ifndef AFS_NAMEI_ENV
1947 if (sp->inodeType == VI_LINKTABLE)
1950 if (*(sp->inode) == 0) {
1952 Log("Missing inode in volume header (%s)\n", sp->description);
1956 Log("Missing inode in volume header (%s); %s\n", sp->description,
1957 (Testing ? "it would have recreated it" : "recreating"));
1960 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1961 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1962 if (!VALID_INO(*(sp->inode)))
1964 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1965 sp->description, errno);
1970 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1971 fdP = IH_OPEN(specH);
1972 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1973 /* bail out early and destroy the volume */
1975 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1982 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1983 sp->description, errno);
1986 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1987 || header.fileHeader.magic != sp->stamp.magic)) {
1989 Log("Part of the header (%s) is corrupted\n", sp->description);
1990 FDH_REALLYCLOSE(fdP);
1994 Log("Part of the header (%s) is corrupted; recreating\n",
1998 if (sp->inodeType == VI_VOLINFO
1999 && header.volumeInfo.destroyMe == DESTROY_ME) {
2002 FDH_REALLYCLOSE(fdP);
2006 if (recreate && !Testing) {
2009 ("Internal error: recreating volume header (%s) in check mode\n",
2011 code = FDH_TRUNC(fdP, 0);
2013 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2014 sp->description, errno);
2016 /* The following code should be moved into vutil.c */
2017 if (sp->inodeType == VI_VOLINFO) {
2019 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2020 header.volumeInfo.stamp = sp->stamp;
2021 header.volumeInfo.id = isp->volumeId;
2022 header.volumeInfo.parentId = isp->RWvolumeId;
2023 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2024 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2025 isp->volumeId, isp->volumeId);
2026 header.volumeInfo.inService = 0;
2027 header.volumeInfo.blessed = 0;
2028 /* The + 1000 is a hack in case there are any files out in venus caches */
2029 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2030 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2031 header.volumeInfo.needsCallback = 0;
2032 gettimeofday(&tp, 0);
2033 header.volumeInfo.creationDate = tp.tv_sec;
2034 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2036 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2037 sp->description, errno);
2040 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2041 sizeof(header.volumeInfo));
2042 if (code != sizeof(header.volumeInfo)) {
2045 ("Unable to write volume header file (%s) (errno = %d)\n",
2046 sp->description, errno);
2047 Abort("Unable to write entire volume header file (%s)\n",
2051 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2053 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2054 sp->description, errno);
2056 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2057 if (code != sizeof(sp->stamp)) {
2060 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2061 sp->description, errno);
2063 ("Unable to write entire version stamp in volume header file (%s)\n",
2068 FDH_REALLYCLOSE(fdP);
2070 if (sp->inodeType == VI_VOLINFO) {
2071 VolInfo = header.volumeInfo;
2074 if (VolInfo.updateDate) {
2075 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
2077 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
2078 (Testing ? "it would have been " : ""), update);
2080 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
2082 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
2083 VolInfo.id, update);
2093 SalvageVnodes(register struct InodeSummary *rwIsp,
2094 register struct InodeSummary *thisIsp,
2095 register struct ViceInodeInfo *inodes, int check)
2097 int ilarge, ismall, ioffset, RW, nInodes;
2098 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2101 RW = (rwIsp == thisIsp);
2102 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2104 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2105 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2106 if (check && ismall == -1)
2109 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2110 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2111 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2115 SalvageIndex(Inode ino, VnodeClass class, int RW,
2116 register struct ViceInodeInfo *ip, int nInodes,
2117 struct VolumeSummary *volSummary, int check)
2119 VolumeId volumeNumber;
2120 char buf[SIZEOF_LARGEDISKVNODE];
2121 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2123 StreamHandle_t *file;
2124 struct VnodeClassInfo *vcp;
2126 afs_fsize_t vnodeLength;
2127 int vnodeIndex, nVnodes;
2128 afs_ino_str_t stmp1, stmp2;
2132 volumeNumber = volSummary->header.id;
2133 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2134 fdP = IH_OPEN(handle);
2135 assert(fdP != NULL);
2136 file = FDH_FDOPEN(fdP, "r+");
2137 assert(file != NULL);
2138 vcp = &VnodeClassInfo[class];
2139 size = OS_SIZE(fdP->fd_fd);
2141 nVnodes = (size / vcp->diskSize) - 1;
2143 assert((nVnodes + 1) * vcp->diskSize == size);
2144 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2148 for (vnodeIndex = 0;
2149 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2150 nVnodes--, vnodeIndex++) {
2151 if (vnode->type != vNull) {
2152 int vnodeChanged = 0;
2153 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2154 /* Log programs that belong to root (potentially suid root);
2155 * don't bother for read-only or backup volumes */
2156 #ifdef notdef /* This is done elsewhere */
2157 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2158 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2160 if (VNDISK_GET_INO(vnode) == 0) {
2162 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2163 memset(vnode, 0, vcp->diskSize);
2167 if (vcp->magic != vnode->vnodeMagic) {
2168 /* bad magic #, probably partially created vnode */
2169 Log("Partially allocated vnode %d deleted.\n",
2171 memset(vnode, 0, vcp->diskSize);
2175 /* ****** Should do a bit more salvage here: e.g. make sure
2176 * vnode type matches what it should be given the index */
2177 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2178 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2179 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2180 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2187 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2188 /* The following doesn't work, because the version number
2189 * is not maintained correctly by the file server */
2190 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2191 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2193 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2199 /* For RW volume, look for vnode with matching inode number;
2200 * if no such match, take the first determined by our sort
2202 register struct ViceInodeInfo *lip = ip;
2203 register int lnInodes = nInodes;
2205 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2206 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2215 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2216 /* "Matching" inode */
2220 vu = vnode->uniquifier;
2221 iu = ip->u.vnode.vnodeUniquifier;
2222 vd = vnode->dataVersion;
2223 id = ip->u.vnode.inodeDataVersion;
2225 * Because of the possibility of the uniquifier overflows (> 4M)
2226 * we compare them modulo the low 22-bits; we shouldn't worry
2227 * about mismatching since they shouldn't to many old
2228 * uniquifiers of the same vnode...
2230 if (IUnique(vu) != IUnique(iu)) {
2232 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2235 vnode->uniquifier = iu;
2236 #ifdef AFS_3DISPARES
2237 vnode->dataVersion = (id >= vd ?
2240 1887437 ? vd : id) :
2243 1887437 ? id : vd));
2245 #if defined(AFS_SGI_EXMAG)
2246 vnode->dataVersion = (id >= vd ?
2249 15099494 ? vd : id) :
2252 15099494 ? id : vd));
2254 vnode->dataVersion = (id > vd ? id : vd);
2255 #endif /* AFS_SGI_EXMAG */
2256 #endif /* AFS_3DISPARES */
2259 /* don't bother checking for vd > id any more, since
2260 * partial file transfers always result in this state,
2261 * and you can't do much else anyway (you've already
2262 * found the best data you can) */
2263 #ifdef AFS_3DISPARES
2264 if (!vnodeIsDirectory(vnodeNumber)
2265 && ((vd < id && (id - vd) < 1887437)
2266 || ((vd > id && (vd - id) > 1887437)))) {
2268 #if defined(AFS_SGI_EXMAG)
2269 if (!vnodeIsDirectory(vnodeNumber)
2270 && ((vd < id && (id - vd) < 15099494)
2271 || ((vd > id && (vd - id) > 15099494)))) {
2273 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2274 #endif /* AFS_SGI_EXMAG */
2277 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2278 vnode->dataVersion = id;
2283 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2286 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2288 VNDISK_SET_INO(vnode, ip->inodeNumber);
2293 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2295 VNDISK_SET_INO(vnode, ip->inodeNumber);
2298 VNDISK_GET_LEN(vnodeLength, vnode);
2299 if (ip->byteCount != vnodeLength) {
2302 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2307 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2308 VNDISK_SET_LEN(vnode, ip->byteCount);
2312 ip->linkCount--; /* Keep the inode around */
2315 } else { /* no matching inode */
2316 if (VNDISK_GET_INO(vnode) != 0
2317 || vnode->type == vDirectory) {
2318 /* No matching inode--get rid of the vnode */
2320 if (VNDISK_GET_INO(vnode)) {
2322 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2326 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2331 if (VNDISK_GET_INO(vnode)) {
2333 time_t serverModifyTime = vnode->serverModifyTime;
2334 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2338 time_t serverModifyTime = vnode->serverModifyTime;
2339 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2342 memset(vnode, 0, vcp->diskSize);
2345 /* Should not reach here becuase we checked for
2346 * (inodeNumber == 0) above. And where we zero the vnode,
2347 * we also goto vnodeDone.
2351 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2355 } /* VNDISK_GET_INO(vnode) != 0 */
2357 assert(!(vnodeChanged && check));
2358 if (vnodeChanged && !Testing) {
2360 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2361 (char *)vnode, vcp->diskSize)
2363 VolumeChanged = 1; /* For break call back */
2374 struct VnodeEssence *
2375 CheckVnodeNumber(VnodeId vnodeNumber)
2378 struct VnodeInfo *vip;
2381 class = vnodeIdToClass(vnodeNumber);
2382 vip = &vnodeInfo[class];
2383 offset = vnodeIdToBitNumber(vnodeNumber);
2384 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2388 CopyOnWrite(register struct DirSummary *dir)
2390 /* Copy the directory unconditionally if we are going to change it:
2391 * not just if was cloned.
2393 struct VnodeDiskObject vnode;
2394 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2395 Inode oldinode, newinode;
2398 if (dir->copied || Testing)
2400 DFlush(); /* Well justified paranoia... */
2403 IH_IREAD(vnodeInfo[vLarge].handle,
2404 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2406 assert(code == sizeof(vnode));
2407 oldinode = VNDISK_GET_INO(&vnode);
2408 /* Increment the version number by a whole lot to avoid problems with
2409 * clients that were promised new version numbers--but the file server
2410 * crashed before the versions were written to disk.
2413 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2414 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2416 assert(VALID_INO(newinode));
2417 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2419 VNDISK_SET_INO(&vnode, newinode);
2421 IH_IWRITE(vnodeInfo[vLarge].handle,
2422 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2424 assert(code == sizeof(vnode));
2426 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2427 fileSysDevice, newinode);
2428 /* Don't delete the original inode right away, because the directory is
2429 * still being scanned.
2435 * This function should either successfully create a new dir, or give up
2436 * and leave things the way they were. In particular, if it fails to write
2437 * the new dir properly, it should return w/o changing the reference to the
2441 CopyAndSalvage(register struct DirSummary *dir)
2443 struct VnodeDiskObject vnode;
2444 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2445 Inode oldinode, newinode;
2450 afs_int32 parentUnique = 1;
2451 struct VnodeEssence *vnodeEssence;
2456 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2458 IH_IREAD(vnodeInfo[vLarge].handle,
2459 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2461 assert(lcode == sizeof(vnode));
2462 oldinode = VNDISK_GET_INO(&vnode);
2463 /* Increment the version number by a whole lot to avoid problems with
2464 * clients that were promised new version numbers--but the file server
2465 * crashed before the versions were written to disk.
2468 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2469 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2471 assert(VALID_INO(newinode));
2472 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2474 /* Assign . and .. vnode numbers from dir and vnode.parent.
2475 * The uniquifier for . is in the vnode.
2476 * The uniquifier for .. might be set to a bogus value of 1 and
2477 * the salvager will later clean it up.
2479 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2480 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2483 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2485 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2490 /* didn't really build the new directory properly, let's just give up. */
2491 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2492 Log("Directory salvage returned code %d, continuing.\n", code);
2494 Log("also failed to decrement link count on new inode");
2498 Log("Checking the results of the directory salvage...\n");
2499 if (!DirOK(&newdir)) {
2500 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2501 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2506 VNDISK_SET_INO(&vnode, newinode);
2507 length = Length(&newdir);
2508 VNDISK_SET_LEN(&vnode, length);
2510 IH_IWRITE(vnodeInfo[vLarge].handle,
2511 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2513 assert(lcode == sizeof(vnode));
2516 nt_sync(fileSysDevice);
2518 sync(); /* this is slow, but hopefully rarely called. We don't have
2519 * an open FD on the file itself to fsync.
2523 vnodeInfo[vLarge].handle->ih_synced = 1;
2525 /* make sure old directory file is really closed */
2526 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2527 FDH_REALLYCLOSE(fdP);
2529 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2531 dir->dirHandle = newdir;
2535 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2538 struct DirSummary *dir = (struct DirSummary *)dirVal;
2539 struct VnodeEssence *vnodeEssence;
2540 afs_int32 dirOrphaned, todelete;
2542 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2544 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2545 if (vnodeEssence == NULL) {
2547 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2551 assert(Delete(&dir->dirHandle, name) == 0);
2556 #ifndef AFS_NAMEI_ENV
2557 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2558 * mount inode for the partition. If this inode were deleted, it would crash
2561 if (vnodeEssence->InodeNumber == 0) {
2562 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2565 assert(Delete(&dir->dirHandle, name) == 0);
2572 if (!(vnodeNumber & 1) && !Showmode
2573 && !(vnodeEssence->count || vnodeEssence->unique
2574 || vnodeEssence->modeBits)) {
2575 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2576 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2577 vnodeNumber, unique,
2578 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2583 assert(Delete(&dir->dirHandle, name) == 0);
2589 /* Check if the Uniquifiers match. If not, change the directory entry
2590 * so its unique matches the vnode unique. Delete if the unique is zero
2591 * or if the directory is orphaned.
2593 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2594 if (!vnodeEssence->unique
2595 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2596 /* This is an orphaned directory. Don't delete the . or ..
2597 * entry. Otherwise, it will get created in the next
2598 * salvage and deleted again here. So Just skip it.
2603 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2606 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2610 fid.Vnode = vnodeNumber;
2611 fid.Unique = vnodeEssence->unique;
2613 assert(Delete(&dir->dirHandle, name) == 0);
2615 assert(Create(&dir->dirHandle, name, &fid) == 0);
2618 return 0; /* no need to continue */
2621 if (strcmp(name, ".") == 0) {
2622 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2625 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2628 assert(Delete(&dir->dirHandle, ".") == 0);
2629 fid.Vnode = dir->vnodeNumber;
2630 fid.Unique = dir->unique;
2631 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2634 vnodeNumber = fid.Vnode; /* Get the new Essence */
2635 unique = fid.Unique;
2636 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2639 } else if (strcmp(name, "..") == 0) {
2642 struct VnodeEssence *dotdot;
2643 pa.Vnode = dir->parent;
2644 dotdot = CheckVnodeNumber(pa.Vnode);
2645 assert(dotdot != NULL); /* XXX Should not be assert */
2646 pa.Unique = dotdot->unique;
2648 pa.Vnode = dir->vnodeNumber;
2649 pa.Unique = dir->unique;
2651 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2653 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2656 assert(Delete(&dir->dirHandle, "..") == 0);
2657 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2660 vnodeNumber = pa.Vnode; /* Get the new Essence */
2662 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2664 dir->haveDotDot = 1;
2665 } else if (strncmp(name, ".__afs", 6) == 0) {
2667 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2671 assert(Delete(&dir->dirHandle, name) == 0);
2673 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2674 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2677 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2678 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2679 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2680 && !(vnodeEssence->modeBits & 0111)) {
2686 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2687 vnodeEssence->InodeNumber);
2690 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2694 size = FDH_SIZE(fdP);
2696 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2697 FDH_REALLYCLOSE(fdP);
2704 code = FDH_READ(fdP, buf, size);
2707 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2708 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2709 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2710 Testing ? "would convert" : "converted");
2711 vnodeEssence->modeBits |= 0111;
2712 vnodeEssence->changed = 1;
2713 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2714 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2715 dir->name ? dir->name : "??", name, buf);
2717 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2718 dir->vname, vnodeNumber, size, code);
2720 FDH_REALLYCLOSE(fdP);
2723 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2724 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2725 if (vnodeIdToClass(vnodeNumber) == vLarge
2726 && vnodeEssence->name == NULL) {
2728 if ((n = (char *)malloc(strlen(name) + 1)))
2730 vnodeEssence->name = n;
2733 /* The directory entry points to the vnode. Check to see if the
2734 * vnode points back to the directory. If not, then let the
2735 * directory claim it (else it might end up orphaned). Vnodes
2736 * already claimed by another directory are deleted from this
2737 * directory: hardlinks to the same vnode are not allowed
2738 * from different directories.
2740 if (vnodeEssence->parent != dir->vnodeNumber) {
2741 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2742 /* Vnode does not point back to this directory.
2743 * Orphaned dirs cannot claim a file (it may belong to
2744 * another non-orphaned dir).
2747 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2749 vnodeEssence->parent = dir->vnodeNumber;
2750 vnodeEssence->changed = 1;
2752 /* Vnode was claimed by another directory */
2755 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2756 } else if (vnodeNumber == 1) {
2757 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2759 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2764 assert(Delete(&dir->dirHandle, name) == 0);
2769 /* This directory claims the vnode */
2770 vnodeEssence->claimed = 1;
2772 vnodeEssence->count--;
2777 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2779 register struct VnodeInfo *vip = &vnodeInfo[class];
2780 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2781 char buf[SIZEOF_LARGEDISKVNODE];
2782 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2784 StreamHandle_t *file;
2789 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2790 fdP = IH_OPEN(vip->handle);
2791 assert(fdP != NULL);
2792 file = FDH_FDOPEN(fdP, "r+");
2793 assert(file != NULL);
2794 size = OS_SIZE(fdP->fd_fd);
2796 vip->nVnodes = (size / vcp->diskSize) - 1;
2797 if (vip->nVnodes > 0) {
2798 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2799 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2800 assert((vip->vnodes = (struct VnodeEssence *)
2801 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2802 if (class == vLarge) {
2803 assert((vip->inodes = (Inode *)
2804 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2813 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2814 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2815 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2816 nVnodes--, vnodeIndex++) {
2817 if (vnode->type != vNull) {
2818 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2819 afs_fsize_t vnodeLength;
2820 vip->nAllocatedVnodes++;
2821 vep->count = vnode->linkCount;
2822 VNDISK_GET_LEN(vnodeLength, vnode);
2823 vep->blockCount = nBlocks(vnodeLength);
2824 vip->volumeBlockCount += vep->blockCount;
2825 vep->parent = vnode->parent;
2826 vep->unique = vnode->uniquifier;
2827 if (*maxu < vnode->uniquifier)
2828 *maxu = vnode->uniquifier;
2829 vep->modeBits = vnode->modeBits;
2830 vep->InodeNumber = VNDISK_GET_INO(vnode);
2831 vep->type = vnode->type;
2832 vep->author = vnode->author;
2833 vep->owner = vnode->owner;
2834 vep->group = vnode->group;
2835 if (vnode->type == vDirectory) {
2836 if (class != vLarge) {
2837 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2838 vip->nAllocatedVnodes--;
2839 memset(vnode, 0, sizeof(vnode));
2840 IH_IWRITE(vnodeInfo[vSmall].handle,
2841 vnodeIndexOffset(vcp, vnodeNumber),
2842 (char *)&vnode, sizeof(vnode));
2845 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2854 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2856 struct VnodeEssence *parentvp;
2862 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2863 && GetDirName(vp->parent, parentvp, path)) {
2865 strcat(path, vp->name);
2871 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2872 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2875 IsVnodeOrphaned(VnodeId vnode)
2877 struct VnodeEssence *vep;
2880 return (1); /* Vnode zero does not exist */
2882 return (0); /* The root dir vnode is always claimed */
2883 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2884 if (!vep || !vep->claimed)
2885 return (1); /* Vnode is not claimed - it is orphaned */
2887 return (IsVnodeOrphaned(vep->parent));
2891 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2892 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2895 static struct DirSummary dir;
2896 static struct DirHandle dirHandle;
2897 struct VnodeEssence *parent;
2898 static char path[MAXPATHLEN];
2901 if (dirVnodeInfo->vnodes[i].salvaged)
2902 return; /* already salvaged */
2905 dirVnodeInfo->vnodes[i].salvaged = 1;
2907 if (dirVnodeInfo->inodes[i] == 0)
2908 return; /* Not allocated to a directory */
2910 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2911 if (dirVnodeInfo->vnodes[i].parent) {
2912 Log("Bad parent, vnode 1; %s...\n",
2913 (Testing ? "skipping" : "salvaging"));
2914 dirVnodeInfo->vnodes[i].parent = 0;
2915 dirVnodeInfo->vnodes[i].changed = 1;
2918 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2919 if (parent && parent->salvaged == 0)
2920 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2921 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2922 rootdir, rootdirfound);
2925 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2926 dir.unique = dirVnodeInfo->vnodes[i].unique;
2929 dir.parent = dirVnodeInfo->vnodes[i].parent;
2930 dir.haveDot = dir.haveDotDot = 0;
2931 dir.ds_linkH = alinkH;
2932 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2933 dirVnodeInfo->inodes[i]);
2935 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2938 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2939 (Testing ? "skipping" : "salvaging"));
2942 CopyAndSalvage(&dir);
2946 dirHandle = dir.dirHandle;
2949 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2950 &dirVnodeInfo->vnodes[i], path);
2953 /* If enumeration failed for random reasons, we will probably delete
2954 * too much stuff, so we guard against this instead.
2956 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2959 /* Delete the old directory if it was copied in order to salvage.
2960 * CopyOnWrite has written the new inode # to the disk, but we still
2961 * have the old one in our local structure here. Thus, we idec the
2965 if (dir.copied && !Testing) {
2966 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2968 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2971 /* Remember rootdir DirSummary _after_ it has been judged */
2972 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2973 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2981 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2983 /* This routine, for now, will only be called for read-write volumes */
2985 int BlocksInVolume = 0, FilesInVolume = 0;
2986 register VnodeClass class;
2987 struct DirSummary rootdir, oldrootdir;
2988 struct VnodeInfo *dirVnodeInfo;
2989 struct VnodeDiskObject vnode;
2990 VolumeDiskData volHeader;
2992 int orphaned, rootdirfound = 0;
2993 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2994 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2995 struct VnodeEssence *vep;
2998 afs_sfsize_t nBytes;
3000 VnodeId LFVnode, ThisVnode;
3001 Unique LFUnique, ThisUnique;
3004 vid = rwIsp->volSummary->header.id;
3005 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3006 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3007 assert(nBytes == sizeof(volHeader));
3008 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3009 assert(volHeader.destroyMe != DESTROY_ME);
3010 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3012 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
3014 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
3017 dirVnodeInfo = &vnodeInfo[vLarge];
3018 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3019 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
3023 nt_sync(fileSysDevice);
3025 sync(); /* This used to be done lower level, for every dir */
3032 /* Parse each vnode looking for orphaned vnodes and
3033 * connect them to the tree as orphaned (if requested).
3035 oldrootdir = rootdir;
3036 for (class = 0; class < nVNODECLASSES; class++) {
3037 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
3038 vep = &(vnodeInfo[class].vnodes[v]);
3039 ThisVnode = bitNumberToVnodeNumber(v, class);
3040 ThisUnique = vep->unique;
3042 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3043 continue; /* Ignore unused, claimed, and root vnodes */
3045 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3046 * entry in this vnode had incremented the parent link count (In
3047 * JudgeEntry()). We need to go to the parent and decrement that
3048 * link count. But if the parent's unique is zero, then the parent
3049 * link count was not incremented in JudgeEntry().
3051 if (class == vLarge) { /* directory vnode */
3052 pv = vnodeIdToBitNumber(vep->parent);
3053 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
3054 vnodeInfo[vLarge].vnodes[pv].count++;
3058 continue; /* If no rootdir, can't attach orphaned files */
3060 /* Here we attach orphaned files and directories into the
3061 * root directory, LVVnode, making sure link counts stay correct.
3063 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3064 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3065 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3067 /* Update this orphaned vnode's info. Its parent info and
3068 * link count (do for orphaned directories and files).
3070 vep->parent = LFVnode; /* Parent is the root dir */
3071 vep->unique = LFUnique;
3074 vep->count--; /* Inc link count (root dir will pt to it) */
3076 /* If this orphaned vnode is a directory, change '..'.
3077 * The name of the orphaned dir/file is unknown, so we
3078 * build a unique name. No need to CopyOnWrite the directory
3079 * since it is not connected to tree in BK or RO volume and
3080 * won't be visible there.
3082 if (class == vLarge) {
3086 /* Remove and recreate the ".." entry in this orphaned directory */
3087 SetSalvageDirHandle(&dh, vid, fileSysDevice,
3088 vnodeInfo[class].inodes[v]);
3090 pa.Unique = LFUnique;
3091 assert(Delete(&dh, "..") == 0);
3092 assert(Create(&dh, "..", &pa) == 0);
3094 /* The original parent's link count was decremented above.
3095 * Here we increment the new parent's link count.
3097 pv = vnodeIdToBitNumber(LFVnode);
3098 vnodeInfo[vLarge].vnodes[pv].count--;
3102 /* Go to the root dir and add this entry. The link count of the
3103 * root dir was incremented when ".." was created. Try 10 times.
3105 for (j = 0; j < 10; j++) {
3106 pa.Vnode = ThisVnode;
3107 pa.Unique = ThisUnique;
3109 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3111 vLarge) ? "__ORPHANDIR__" :
3112 "__ORPHANFILE__"), ThisVnode,
3115 CopyOnWrite(&rootdir);
3116 code = Create(&rootdir.dirHandle, npath, &pa);
3120 ThisUnique += 50; /* Try creating a different file */
3123 Log("Attaching orphaned %s to volume's root dir as %s\n",
3124 ((class == vLarge) ? "directory" : "file"), npath);
3126 } /* for each vnode in the class */
3127 } /* for each class of vnode */
3129 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3131 if (!oldrootdir.copied && rootdir.copied) {
3133 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3136 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3139 DFlush(); /* Flush the changes */
3140 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3141 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3142 orphans = ORPH_IGNORE;
3145 /* Write out all changed vnodes. Orphaned files and directories
3146 * will get removed here also (if requested).
3148 for (class = 0; class < nVNODECLASSES; class++) {
3149 int nVnodes = vnodeInfo[class].nVnodes;
3150 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3151 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3152 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3153 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3154 for (i = 0; i < nVnodes; i++) {
3155 register struct VnodeEssence *vnp = &vnodes[i];
3156 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3158 /* If the vnode is good but is unclaimed (not listed in
3159 * any directory entries), then it is orphaned.
3162 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3163 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3167 if (vnp->changed || vnp->count) {
3171 IH_IREAD(vnodeInfo[class].handle,
3172 vnodeIndexOffset(vcp, vnodeNumber),
3173 (char *)&vnode, sizeof(vnode));
3174 assert(nBytes == sizeof(vnode));
3176 vnode.parent = vnp->parent;
3177 oldCount = vnode.linkCount;
3178 vnode.linkCount = vnode.linkCount - vnp->count;
3181 orphaned = IsVnodeOrphaned(vnodeNumber);
3183 if (!vnp->todelete) {
3184 /* Orphans should have already been attached (if requested) */
3185 assert(orphans != ORPH_ATTACH);
3186 oblocks += vnp->blockCount;
3189 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3191 BlocksInVolume -= vnp->blockCount;
3193 if (VNDISK_GET_INO(&vnode)) {
3195 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3198 memset(&vnode, 0, sizeof(vnode));
3200 } else if (vnp->count) {
3202 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3205 vnode.modeBits = vnp->modeBits;
3208 vnode.dataVersion++;
3211 IH_IWRITE(vnodeInfo[class].handle,
3212 vnodeIndexOffset(vcp, vnodeNumber),
3213 (char *)&vnode, sizeof(vnode));
3214 assert(nBytes == sizeof(vnode));
3220 if (!Showmode && ofiles) {
3221 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3223 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3227 for (class = 0; class < nVNODECLASSES; class++) {
3228 register struct VnodeInfo *vip = &vnodeInfo[class];
3229 for (i = 0; i < vip->nVnodes; i++)
3230 if (vip->vnodes[i].name)
3231 free(vip->vnodes[i].name);
3238 /* Set correct resource utilization statistics */
3239 volHeader.filecount = FilesInVolume;
3240 volHeader.diskused = BlocksInVolume;
3242 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3243 if (volHeader.uniquifier < (maxunique + 1)) {
3245 Log("Volume uniquifier is too low; fixed\n");
3246 /* Plus 2,000 in case there are workstations out there with
3247 * cached vnodes that have since been deleted
3249 volHeader.uniquifier = (maxunique + 1 + 2000);
3252 /* Turn off the inUse bit; the volume's been salvaged! */
3253 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3254 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3255 volHeader.inService = 1; /* allow service again */
3256 volHeader.needsCallback = (VolumeChanged != 0);
3257 volHeader.dontSalvage = DONT_SALVAGE;
3260 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3261 assert(nBytes == sizeof(volHeader));
3264 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3265 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3266 FilesInVolume, BlocksInVolume);
3268 IH_RELEASE(vnodeInfo[vSmall].handle);
3269 IH_RELEASE(vnodeInfo[vLarge].handle);
3275 ClearROInUseBit(struct VolumeSummary *summary)
3277 IHandle_t *h = summary->volumeInfoHandle;
3278 afs_sfsize_t nBytes;
3280 VolumeDiskData volHeader;
3282 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3283 assert(nBytes == sizeof(volHeader));
3284 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3285 volHeader.inUse = 0;
3286 volHeader.needsSalvaged = 0;
3287 volHeader.inService = 1;
3288 volHeader.dontSalvage = DONT_SALVAGE;
3290 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3291 assert(nBytes == sizeof(volHeader));
3296 * Possible delete the volume.
3298 * deleteMe - Always do so, only a partial volume.
3301 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3304 if (readOnly(isp) || deleteMe) {
3305 if (isp->volSummary && isp->volSummary->fileName) {
3308 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3310 Log("It will be deleted on this server (you may find it elsewhere)\n");
3313 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3315 Log("it will be deleted instead. It should be recloned.\n");
3319 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
3321 Log("Unable to unlink %s (errno = %d)\n", path, errno);
3325 } else if (!check) {
3326 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3328 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3334 AskOffline(VolumeId volumeId, char * partition)
3338 for (i = 0; i < 3; i++) {
3339 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3341 if (code == SYNC_OK) {
3343 } else if (code == SYNC_DENIED) {
3344 #ifdef DEMAND_ATTACH_ENABLE
3345 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3347 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3349 Abort("Salvage aborted\n");
3350 } else if (code == SYNC_BAD_COMMAND) {
3351 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3353 #ifdef DEMAND_ATTACH_ENABLE
3354 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3356 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3358 Abort("Salvage aborted\n");
3361 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3362 FSYNC_clientFinis();
3366 if (code != SYNC_OK) {
3367 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3368 Abort("Salvage aborted\n");
3371 #ifdef AFS_DEMAND_ATTACH_FS
3372 /* set inUse = programType in the volume header. We do this in case
3373 * the fileserver restarts/crashes while we are salvaging.
3374 * Otherwise, the fileserver could attach the volume again on
3375 * startup while we are salvaging, which would be very bad, or
3376 * schedule another salvage while we are salvaging, which would be
3381 char name[VMAXPATHLEN];
3382 struct VolumeHeader header;
3383 struct VolumeDiskHeader diskHeader;
3384 struct VolumeDiskData volHeader;
3386 afs_snprintf(name, sizeof(name), "%s/" VFORMAT, fileSysPathName,
3387 afs_printable_uint32_lu(volumeId));
3389 fd = afs_open(name, O_RDONLY);
3393 if (read(fd, &diskHeader, sizeof(diskHeader)) != sizeof(diskHeader) ||
3394 diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
3401 DiskToVolumeHeader(&header, &diskHeader);
3403 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
3404 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
3405 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
3411 volHeader.inUse = programType;
3413 /* If we can't re-write the header, bail out and error. We don't
3414 * assert when reading the header, since it's possible the
3415 * header isn't really there (when there's no data associated
3416 * with the volume; we just delete the vol header file in that
3417 * case). But if it's there enough that we can read it, but
3418 * somehow we cannot write to it to signify we're salvaging it,
3419 * we've got a big problem and we cannot continue. */
3420 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
3424 #endif /* AFS_DEMAND_ATTACH_FS */
3428 AskOnline(VolumeId volumeId, char *partition)
3432 for (i = 0; i < 3; i++) {
3433 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3435 if (code == SYNC_OK) {
3437 } else if (code == SYNC_DENIED) {
3438 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3439 } else if (code == SYNC_BAD_COMMAND) {
3440 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3442 #ifdef DEMAND_ATTACH_ENABLE
3443 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3445 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3450 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3451 FSYNC_clientFinis();
3458 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3460 /* Volume parameter is passed in case iopen is upgraded in future to
3461 * require a volume Id to be passed
3464 IHandle_t *srcH, *destH;
3465 FdHandle_t *srcFdP, *destFdP;
3468 IH_INIT(srcH, device, rwvolume, inode1);
3469 srcFdP = IH_OPEN(srcH);
3470 assert(srcFdP != NULL);
3471 IH_INIT(destH, device, rwvolume, inode2);
3472 destFdP = IH_OPEN(destH);
3474 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3475 assert(FDH_WRITE(destFdP, buf, n) == n);
3477 FDH_REALLYCLOSE(srcFdP);
3478 FDH_REALLYCLOSE(destFdP);
3485 PrintInodeList(void)
3487 register struct ViceInodeInfo *ip;
3488 struct ViceInodeInfo *buf;
3489 struct afs_stat status;
3490 register int nInodes;
3492 assert(afs_fstat(inodeFd, &status) == 0);
3493 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3494 assert(buf != NULL);
3495 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3496 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3497 for (ip = buf; nInodes--; ip++) {
3498 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3499 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3500 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3501 ip->u.param[2], ip->u.param[3]);
3507 PrintInodeSummary(void)
3510 struct InodeSummary *isp;
3512 for (i = 0; i < nVolumesInInodeFile; i++) {
3513 isp = &inodeSummary[i];
3514 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3519 PrintVolumeSummary(void)
3522 struct VolumeSummary *vsp;
3524 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3525 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3535 assert(0); /* Fork is never executed in the NT code path */
3539 #ifdef AFS_DEMAND_ATTACH_FS
3540 if ((f == 0) && (programType == salvageServer)) {
3541 /* we are a salvageserver child */
3542 #ifdef FSSYNC_BUILD_CLIENT
3543 VChildProcReconnectFS_r();
3545 #ifdef SALVSYNC_BUILD_CLIENT
3549 #endif /* AFS_DEMAND_ATTACH_FS */
3550 #endif /* !AFS_NT40_ENV */
3560 #ifdef AFS_DEMAND_ATTACH_FS
3561 if (programType == salvageServer) {
3562 #ifdef SALVSYNC_BUILD_CLIENT
3565 #ifdef FSSYNC_BUILD_CLIENT
3569 #endif /* AFS_DEMAND_ATTACH_FS */
3572 if (main_thread != pthread_self())
3573 pthread_exit((void *)code);
3586 pid = wait(&status);
3588 if (WCOREDUMP(status))
3589 Log("\"%s\" core dumped!\n", prog);
3590 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3596 TimeStamp(time_t clock, int precision)
3599 static char timestamp[20];
3600 lt = localtime(&clock);
3602 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3604 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3609 CheckLogFile(char * log_path)
3611 char oldSlvgLog[AFSDIR_PATH_MAX];
3613 #ifndef AFS_NT40_ENV
3620 strcpy(oldSlvgLog, log_path);
3621 strcat(oldSlvgLog, ".old");
3623 renamefile(log_path, oldSlvgLog);
3624 logFile = afs_fopen(log_path, "a");
3626 if (!logFile) { /* still nothing, use stdout */
3630 #ifndef AFS_NAMEI_ENV
3631 AFS_DEBUG_IOPS_LOG(logFile);
3636 #ifndef AFS_NT40_ENV
3638 TimeStampLogFile(char * log_path)
3640 char stampSlvgLog[AFSDIR_PATH_MAX];
3645 lt = localtime(&now);
3646 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3647 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3648 log_path, lt->tm_year + 1900,
3649 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3652 /* try to link the logfile to a timestamped filename */
3653 /* if it fails, oh well, nothing we can do */
3654 link(log_path, stampSlvgLog);
3663 #ifndef AFS_NT40_ENV
3665 printf("Can't show log since using syslog.\n");
3676 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3679 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3682 while (fgets(line, sizeof(line), logFile))
3689 Log(const char *format, ...)
3695 va_start(args, format);
3696 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3698 #ifndef AFS_NT40_ENV
3700 syslog(LOG_INFO, "%s", tmp);
3704 gettimeofday(&now, 0);
3705 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3711 Abort(const char *format, ...)
3716 va_start(args, format);
3717 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3719 #ifndef AFS_NT40_ENV
3721 syslog(LOG_INFO, "%s", tmp);
3725 fprintf(logFile, "%s", tmp);
3740 p = (char *)malloc(strlen(s) + 1);
3746 /* Remove the FORCESALVAGE file */
3748 RemoveTheForce(char *path)
3751 struct afs_stat force; /* so we can use afs_stat to find it */
3752 strcpy(target,path);
3753 strcat(target,"/FORCESALVAGE");
3754 if (!Testing && ForceSalvage) {
3755 if (afs_stat(target,&force) == 0) unlink(target);
3759 #ifndef AFS_AIX32_ENV
3761 * UseTheForceLuke - see if we can use the force
3764 UseTheForceLuke(char *path)
3766 struct afs_stat force;
3768 strcpy(target,path);
3769 strcat(target,"/FORCESALVAGE");
3771 return (afs_stat(target, &force) == 0);
3775 * UseTheForceLuke - see if we can use the force
3778 * The VRMIX fsck will not muck with the filesystem it is supposedly
3779 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3780 * muck directly with the root inode, which is within the normal
3782 * ListViceInodes() has a side effect of setting ForceSalvage if
3783 * it detects a need, based on root inode examination.
3786 UseTheForceLuke(char *path)
3789 return 0; /* sorry OB1 */
3794 /* NT support routines */
3796 static char execpathname[MAX_PATH];
3798 nt_SalvagePartition(char *partName, int jobn)
3803 if (!*execpathname) {
3804 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3805 if (!n || n == 1023)
3808 job.cj_magic = SALVAGER_MAGIC;
3809 job.cj_number = jobn;
3810 (void)strcpy(job.cj_part, partName);
3811 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3816 nt_SetupPartitionSalvage(void *datap, int len)
3818 childJob_t *jobp = (childJob_t *) datap;
3819 char logname[AFSDIR_PATH_MAX];
3821 if (len != sizeof(childJob_t))
3823 if (jobp->cj_magic != SALVAGER_MAGIC)
3828 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3830 logFile = afs_fopen(logname, "w");
3838 #endif /* AFS_NT40_ENV */