2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
197 /*@+fcnmacros +macrofcndecl@*/
200 extern off64_t afs_lseek(int FD, off64_t O, int F);
201 #endif /*S_SPLINT_S */
202 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
203 #define afs_stat stat64
204 #define afs_fstat fstat64
205 #define afs_open open64
206 #define afs_fopen fopen64
207 #else /* !O_LARGEFILE */
209 extern off_t afs_lseek(int FD, off_t O, int F);
210 #endif /*S_SPLINT_S */
211 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
212 #define afs_stat stat
213 #define afs_fstat fstat
214 #define afs_open open
215 #define afs_fopen fopen
216 #endif /* !O_LARGEFILE */
217 /*@=fcnmacros =macrofcndecl@*/
220 extern void *calloc();
222 static char *TimeStamp(time_t clock, int precision);
225 int debug; /* -d flag */
226 extern int Testing; /* -n flag */
227 int ListInodeOption; /* -i flag */
228 int ShowRootFiles; /* -r flag */
229 int RebuildDirs; /* -sal flag */
230 int Parallel = 4; /* -para X flag */
231 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
232 int forceR = 0; /* -b flag */
233 int ShowLog = 0; /* -showlog flag */
234 int ShowSuid = 0; /* -showsuid flag */
235 int ShowMounts = 0; /* -showmounts flag */
236 int orphans = ORPH_IGNORE; /* -orphans option */
241 int useSyslog = 0; /* -syslog flag */
242 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
251 #define MAXPARALLEL 32
253 int OKToZap; /* -o flag */
254 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
255 * in the volume header */
257 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
259 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
261 Device fileSysDevice; /* The device number of the current
262 * partition being salvaged */
266 char *fileSysPath; /* The path of the mounted partition currently
267 * being salvaged, i.e. the directory
268 * containing the volume headers */
270 char *fileSysPathName; /* NT needs this to make name pretty in log. */
271 IHandle_t *VGLinkH; /* Link handle for current volume group. */
272 int VGLinkH_cnt; /* # of references to lnk handle. */
273 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
275 char *fileSysDeviceName; /* The block device where the file system
276 * being salvaged was mounted */
277 char *filesysfulldev;
279 int VolumeChanged; /* Set by any routine which would change the volume in
280 * a way which would require callback is to be broken if the
281 * volume was put back on line by an active file server */
283 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
285 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
286 int inodeFd; /* File descriptor for inode file */
289 struct VnodeInfo vnodeInfo[nVNODECLASSES];
292 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
293 int nVolumes; /* Number of volumes (read-write and read-only)
294 * in volume summary */
300 /* Forward declarations */
301 /*@printflike@*/ void Log(const char *format, ...);
302 /*@printflike@*/ void Abort(const char *format, ...);
303 static int IsVnodeOrphaned(VnodeId vnode);
305 /* Uniquifier stored in the Inode */
310 return (u & 0x3fffff);
312 #if defined(AFS_SGI_EXMAG)
313 return (u & SGI_UNIQMASK);
316 #endif /* AFS_SGI_EXMAG */
321 BadError(register int aerror)
323 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
325 return 0; /* otherwise may be transient, e.g. EMFILE */
330 char *save_args[MAX_ARGS];
332 extern pthread_t main_thread;
333 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
336 /* Get the salvage lock if not already held. Hold until process exits. */
338 ObtainSalvageLock(void)
344 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
345 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
346 if (salvageLock == INVALID_FD) {
348 "salvager: There appears to be another salvager running! Aborted.\n");
353 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
354 if (salvageLock < 0) {
356 "salvager: can't open salvage lock file %s, aborting\n",
357 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
360 #ifdef AFS_DARWIN_ENV
361 if (flock(salvageLock, LOCK_EX) == -1) {
363 if (lockf(salvageLock, F_LOCK, 0) == -1) {
366 "salvager: There appears to be another salvager running! Aborted.\n");
373 #ifdef AFS_SGI_XFS_IOPS_ENV
374 /* Check if the given partition is mounted. For XFS, the root inode is not a
375 * constant. So we check the hard way.
378 IsPartitionMounted(char *part)
381 struct mntent *mntent;
383 assert(mntfp = setmntent(MOUNTED, "r"));
384 while (mntent = getmntent(mntfp)) {
385 if (!strcmp(part, mntent->mnt_dir))
390 return mntent ? 1 : 1;
393 /* Check if the given inode is the root of the filesystem. */
394 #ifndef AFS_SGI_XFS_IOPS_ENV
396 IsRootInode(struct afs_stat *status)
399 * The root inode is not a fixed value in XFS partitions. So we need to
400 * see if the partition is in the list of mounted partitions. This only
401 * affects the SalvageFileSys path, so we check there.
403 return (status->st_ino == ROOTINODE);
408 #ifndef AFS_NAMEI_ENV
409 /* We don't want to salvage big files filesystems, since we can't put volumes on
413 CheckIfBigFilesFS(char *mountPoint, char *devName)
415 struct superblock fs;
418 if (strncmp(devName, "/dev/", 5)) {
419 (void)sprintf(name, "/dev/%s", devName);
421 (void)strcpy(name, devName);
424 if (ReadSuper(&fs, name) < 0) {
425 Log("Unable to read superblock. Not salvaging partition %s.\n",
429 if (IsBigFilesFileSystem(&fs)) {
430 Log("Partition %s is a big files filesystem, not salvaging.\n",
440 #define HDSTR "\\Device\\Harddisk"
441 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
443 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
448 static int dowarn = 1;
450 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
452 if (strncmp(res, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res, HDSTR, p1->devName);
460 d1 = atoi(&res[HDLEN]);
462 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
464 if (strncmp(res, HDSTR, HDLEN)) {
467 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
468 res, HDSTR, p2->devName);
472 d2 = atoi(&res[HDLEN]);
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, '/');
698 ptr = (char *)strrchr(pbuffer, '/');
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 static char tmpDevName[100];
712 static char wpath[100];
713 struct VolumeSummary *vsp, *esp;
716 fileSysPartition = partP;
717 fileSysDevice = fileSysPartition->device;
718 fileSysPathName = VPartitionPath(fileSysPartition);
721 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
722 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
723 name = partP->devName;
725 fileSysPath = fileSysPathName;
726 strcpy(tmpDevName, partP->devName);
727 name = get_DevName(tmpDevName, wpath);
728 fileSysDeviceName = name;
729 filesysfulldev = wpath;
732 VLockPartition(partP->name);
733 if (singleVolumeNumber || ForceSalvage)
736 ForceSalvage = UseTheForceLuke(fileSysPath);
738 if (singleVolumeNumber) {
739 /* salvageserver already setup fssync conn for us */
740 if ((programType != salvageServer) && !VConnectFS()) {
741 Abort("Couldn't connect to file server\n");
743 AskOffline(singleVolumeNumber, partP->name);
746 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
747 partP->name, name, (Testing ? "(READONLY mode)" : ""));
749 Log("***Forced salvage of all volumes on this partition***\n");
754 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
761 assert((dirp = opendir(fileSysPath)) != NULL);
762 while ((dp = readdir(dirp))) {
763 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
764 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
766 Log("Removing old salvager temp files %s\n", dp->d_name);
767 strcpy(npath, fileSysPath);
769 strcat(npath, dp->d_name);
775 tdir = (tmpdir ? tmpdir : fileSysPath);
777 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
778 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
780 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
783 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
784 unlink(inodeListPath);
788 /* Using nt_unlink here since we're really using the delete on close
789 * semantics of unlink. In most places in the salvager, we really do
790 * mean to unlink the file at that point. Those places have been
791 * modified to actually do that so that the NT crt can be used there.
794 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
795 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
797 inodeFd = afs_open(inodeListPath, O_RDONLY);
798 unlink(inodeListPath);
801 Abort("Temporary file %s is missing...\n", inodeListPath);
802 if (ListInodeOption) {
806 /* enumerate volumes in the partition.
807 * figure out sets of read-only + rw volumes.
808 * salvage each set, read-only volumes first, then read-write.
809 * Fix up inodes on last volume in set (whether it is read-write
812 GetVolumeSummary(singleVolumeNumber);
814 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
815 i < nVolumesInInodeFile; i = j) {
816 VolumeId rwvid = inodeSummary[i].RWvolumeId;
818 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
820 VolumeId vid = inodeSummary[j].volumeId;
821 struct VolumeSummary *tsp;
822 /* Scan volume list (from partition root directory) looking for the
823 * current rw volume number in the volume list from the inode scan.
824 * If there is one here that is not in the inode volume list,
826 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
828 DeleteExtraVolumeHeaderFile(vsp);
830 /* Now match up the volume summary info from the root directory with the
831 * entry in the volume list obtained from scanning inodes */
832 inodeSummary[j].volSummary = NULL;
833 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
834 if (tsp->header.id == vid) {
835 inodeSummary[j].volSummary = tsp;
841 /* Salvage the group of volumes (several read-only + 1 read/write)
842 * starting with the current read-only volume we're looking at.
844 SalvageVolumeGroup(&inodeSummary[i], j - i);
847 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
848 for (; vsp < esp; vsp++) {
850 DeleteExtraVolumeHeaderFile(vsp);
853 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
854 RemoveTheForce(fileSysPath);
856 if (!Testing && singleVolumeNumber) {
857 AskOnline(singleVolumeNumber, fileSysPartition->name);
859 /* Step through the volumeSummary list and set all volumes on-line.
860 * The volumes were taken off-line in GetVolumeSummary.
862 for (j = 0; j < nVolumes; j++) {
863 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
867 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
868 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
871 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
875 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
878 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
881 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
884 code = VDestroyVolumeDiskHeader(fileSysPartition, vsp->header.id, vsp->header.parent);
886 Log("Error %ld destroying volume disk header for volume %lu\n",
887 afs_printable_int32_ld(code),
888 afs_printable_uint32_lu(vsp->header.id));
891 /* make sure we actually delete the fileName file; ENOENT
892 * is fine, since VDestroyVolumeDiskHeader probably already
894 if (unlink(path) && errno != ENOENT) {
895 Log("Unable to unlink %s (errno = %d)\n", path, errno);
902 CompareInodes(const void *_p1, const void *_p2)
904 register const struct ViceInodeInfo *p1 = _p1;
905 register const struct ViceInodeInfo *p2 = _p2;
906 if (p1->u.vnode.vnodeNumber == INODESPECIAL
907 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
908 VolumeId p1rwid, p2rwid;
910 (p1->u.vnode.vnodeNumber ==
911 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
913 (p2->u.vnode.vnodeNumber ==
914 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
919 if (p1->u.vnode.vnodeNumber == INODESPECIAL
920 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
921 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
922 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
923 if (p1->u.vnode.volumeId == p1rwid)
925 if (p2->u.vnode.volumeId == p2rwid)
927 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
929 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
930 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
931 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
933 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
935 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
937 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
939 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
941 /* The following tests are reversed, so that the most desirable
942 * of several similar inodes comes first */
943 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
945 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
946 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
950 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
951 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
956 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
958 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
959 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
963 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
964 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
969 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
971 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
972 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
976 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
977 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
982 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
984 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
985 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
989 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
990 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
999 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
1000 register struct InodeSummary *summary)
1002 VolumeId volume = ip->u.vnode.volumeId;
1003 VolumeId rwvolume = volume;
1004 register int n, nSpecial;
1005 register Unique maxunique;
1008 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1010 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1012 rwvolume = ip->u.special.parentId;
1013 /* This isn't quite right, as there could (in error) be different
1014 * parent inodes in different special vnodes */
1016 if (maxunique < ip->u.vnode.vnodeUniquifier)
1017 maxunique = ip->u.vnode.vnodeUniquifier;
1021 summary->volumeId = volume;
1022 summary->RWvolumeId = rwvolume;
1023 summary->nInodes = n;
1024 summary->nSpecialInodes = nSpecial;
1025 summary->maxUniquifier = maxunique;
1029 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1031 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1032 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1033 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1038 * Collect list of inodes in file named by path. If a truly fatal error,
1039 * unlink the file and abort. For lessor errors, return -1. The file will
1040 * be unlinked by the caller.
1043 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1045 struct afs_stat status;
1047 struct ViceInodeInfo *ip;
1048 struct InodeSummary summary;
1049 char summaryFileName[50];
1052 char *dev = fileSysPath;
1053 char *wpath = fileSysPath;
1055 char *dev = fileSysDeviceName;
1056 char *wpath = filesysfulldev;
1058 char *part = fileSysPath;
1061 /* This file used to come from vfsck; cobble it up ourselves now... */
1063 ListViceInodes(dev, fileSysPath, path,
1064 singleVolumeNumber ? OnlyOneVolume : 0,
1065 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1067 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1071 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1073 if (forceSal && !ForceSalvage) {
1074 Log("***Forced salvage of all volumes on this partition***\n");
1077 inodeFd = afs_open(path, O_RDWR);
1078 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1080 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1082 tdir = (tmpdir ? tmpdir : part);
1084 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1085 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1087 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1088 "%s/salvage.temp.%d", tdir, getpid());
1090 summaryFile = afs_fopen(summaryFileName, "a+");
1091 if (summaryFile == NULL) {
1094 Abort("Unable to create inode summary file\n");
1096 if (!canfork || debug || Fork() == 0) {
1098 unsigned long st_size=(unsigned long) status.st_size;
1099 nInodes = st_size / sizeof(struct ViceInodeInfo);
1101 fclose(summaryFile);
1103 unlink(summaryFileName);
1104 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1105 RemoveTheForce(fileSysPath);
1107 struct VolumeSummary *vsp;
1110 GetVolumeSummary(singleVolumeNumber);
1112 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1114 DeleteExtraVolumeHeaderFile(vsp);
1117 Log("%s vice inodes on %s; not salvaged\n",
1118 singleVolumeNumber ? "No applicable" : "No", dev);
1121 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1123 fclose(summaryFile);
1126 unlink(summaryFileName);
1128 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1131 if (read(inodeFd, ip, st_size) != st_size) {
1132 fclose(summaryFile);
1135 unlink(summaryFileName);
1136 Abort("Unable to read inode table; %s not salvaged\n", dev);
1138 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1139 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1140 || write(inodeFd, ip, st_size) != st_size) {
1141 fclose(summaryFile);
1144 unlink(summaryFileName);
1145 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1149 CountVolumeInodes(ip, nInodes, &summary);
1150 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1151 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1152 fclose(summaryFile);
1156 summary.index += (summary.nInodes);
1157 nInodes -= summary.nInodes;
1158 ip += summary.nInodes;
1160 /* Following fflush is not fclose, because if it was debug mode would not work */
1161 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1162 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1163 fclose(summaryFile);
1167 if (canfork && !debug) {
1172 if (Wait("Inode summary") == -1) {
1173 fclose(summaryFile);
1176 unlink(summaryFileName);
1177 Exit(1); /* salvage of this partition aborted */
1180 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1181 if (status.st_size != 0) {
1183 unsigned long st_status=(unsigned long)status.st_size;
1184 inodeSummary = (struct InodeSummary *)malloc(st_status);
1185 assert(inodeSummary != NULL);
1186 /* For GNU we need to do lseek to get the file pointer moved. */
1187 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1188 ret = read(fileno(summaryFile), inodeSummary, st_status);
1189 assert(ret == st_status);
1191 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1192 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1193 fclose(summaryFile);
1195 unlink(summaryFileName);
1199 /* Comparison routine for volume sort.
1200 This is setup so that a read-write volume comes immediately before
1201 any read-only clones of that volume */
1203 CompareVolumes(const void *_p1, const void *_p2)
1205 register const struct VolumeSummary *p1 = _p1;
1206 register const struct VolumeSummary *p2 = _p2;
1207 if (p1->header.parent != p2->header.parent)
1208 return p1->header.parent < p2->header.parent ? -1 : 1;
1209 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1211 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1213 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1217 GetVolumeSummary(VolumeId singleVolumeNumber)
1220 afs_int32 nvols = 0;
1221 struct VolumeSummary *vsp, vs;
1222 struct VolumeDiskHeader diskHeader;
1225 /* Get headers from volume directory */
1226 dirp = opendir(fileSysPath);
1228 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1229 if (!singleVolumeNumber) {
1230 while ((dp = readdir(dirp))) {
1231 char *p = dp->d_name;
1232 p = strrchr(dp->d_name, '.');
1233 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1236 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1237 if ((fd = afs_open(name, O_RDONLY)) != -1
1238 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1239 == sizeof(diskHeader)
1240 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1241 DiskToVolumeHeader(&vs.header, &diskHeader);
1249 dirp = opendir("."); /* No rewinddir for NT */
1256 (struct VolumeSummary *)malloc(nvols *
1257 sizeof(struct VolumeSummary));
1260 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1261 assert(volumeSummaryp != NULL);
1264 vsp = volumeSummaryp;
1265 while ((dp = readdir(dirp))) {
1266 char *p = dp->d_name;
1267 p = strrchr(dp->d_name, '.');
1268 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1272 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1273 if ((fd = afs_open(name, O_RDONLY)) == -1
1274 || read(fd, &diskHeader, sizeof(diskHeader))
1275 != sizeof(diskHeader)
1276 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1281 if (!singleVolumeNumber) {
1283 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1286 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1291 char nameShouldBe[64];
1292 DiskToVolumeHeader(&vsp->header, &diskHeader);
1293 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1294 && vsp->header.parent != singleVolumeNumber) {
1295 if (programType == salvageServer) {
1296 #ifdef SALVSYNC_BUILD_CLIENT
1297 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1298 vsp->header.id, vsp->header.parent);
1299 if (SALVSYNC_LinkVolume(vsp->header.parent,
1301 fileSysPartition->name,
1303 Log("schedule request failed\n");
1306 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1308 Log("%u is a read-only volume; not salvaged\n",
1309 singleVolumeNumber);
1313 if (!singleVolumeNumber
1314 || (vsp->header.id == singleVolumeNumber
1315 || vsp->header.parent == singleVolumeNumber)) {
1316 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1317 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1318 if (singleVolumeNumber
1319 && vsp->header.id != singleVolumeNumber)
1320 AskOffline(vsp->header.id, fileSysPartition->name);
1321 if (strcmp(nameShouldBe, dp->d_name)) {
1323 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", name, (Testing ? "it would have been " : ""));
1326 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1330 vsp->fileName = ToString(dp->d_name);
1340 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1344 /* Find the link table. This should be associated with the RW volume or, if
1345 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1348 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1349 struct ViceInodeInfo *allInodes)
1352 struct ViceInodeInfo *ip;
1354 for (i = 0; i < nVols; i++) {
1355 ip = allInodes + isp[i].index;
1356 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1357 if (ip[j].u.special.type == VI_LINKTABLE)
1358 return ip[j].inodeNumber;
1365 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1367 struct versionStamp version;
1370 if (!VALID_INO(ino))
1372 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1373 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1374 if (!VALID_INO(ino))
1376 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1377 isp->RWvolumeId, errno);
1378 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1379 fdP = IH_OPEN(VGLinkH);
1381 Abort("Can't open link table for volume %u (error = %d)\n",
1382 isp->RWvolumeId, errno);
1384 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1385 Abort("Can't truncate link table for volume %u (error = %d)\n",
1386 isp->RWvolumeId, errno);
1388 version.magic = LINKTABLEMAGIC;
1389 version.version = LINKTABLEVERSION;
1391 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1393 Abort("Can't truncate link table for volume %u (error = %d)\n",
1394 isp->RWvolumeId, errno);
1396 FDH_REALLYCLOSE(fdP);
1398 /* If the volume summary exits (i.e., the V*.vol header file exists),
1399 * then set this inode there as well.
1401 if (isp->volSummary)
1402 isp->volSummary->header.linkTable = ino;
1411 SVGParms_t *parms = (SVGParms_t *) arg;
1412 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1417 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1420 pthread_attr_t tattr;
1424 /* Initialize per volume global variables, even if later code does so */
1428 memset(&VolInfo, 0, sizeof(VolInfo));
1430 parms.svgp_inodeSummaryp = isp;
1431 parms.svgp_count = nVols;
1432 code = pthread_attr_init(&tattr);
1434 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1438 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1440 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1443 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1445 Log("Failed to create thread to salvage volume group %u\n",
1449 (void)pthread_join(tid, NULL);
1451 #endif /* AFS_NT40_ENV */
1454 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1456 struct ViceInodeInfo *inodes, *allInodes, *ip;
1457 int i, totalInodes, size, salvageTo;
1461 int dec_VGLinkH = 0;
1463 FdHandle_t *fdP = NULL;
1466 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1467 && isp->nSpecialInodes > 0);
1468 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1469 if (!ForceSalvage && QuickCheck(isp, nVols))
1472 if (ShowMounts && !haveRWvolume)
1474 if (canfork && !debug && Fork() != 0) {
1475 (void)Wait("Salvage volume group");
1478 for (i = 0, totalInodes = 0; i < nVols; i++)
1479 totalInodes += isp[i].nInodes;
1480 size = totalInodes * sizeof(struct ViceInodeInfo);
1481 inodes = (struct ViceInodeInfo *)malloc(size);
1482 allInodes = inodes - isp->index; /* this would the base of all the inodes
1483 * for the partition, if all the inodes
1484 * had been read into memory */
1486 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1488 assert(read(inodeFd, inodes, size) == size);
1490 /* Don't try to salvage a read write volume if there isn't one on this
1492 salvageTo = haveRWvolume ? 0 : 1;
1494 #ifdef AFS_NAMEI_ENV
1495 ino = FindLinkHandle(isp, nVols, allInodes);
1496 if (VALID_INO(ino)) {
1497 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1498 fdP = IH_OPEN(VGLinkH);
1500 if (!VALID_INO(ino) || fdP == NULL) {
1501 Log("%s link table for volume %u.\n",
1502 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1504 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1507 struct ViceInodeInfo *ip;
1508 CreateLinkTable(isp, ino);
1509 fdP = IH_OPEN(VGLinkH);
1510 /* Sync fake 1 link counts to the link table, now that it exists */
1512 for (i = 0; i < nVols; i++) {
1513 ip = allInodes + isp[i].index;
1514 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1516 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1518 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1526 FDH_REALLYCLOSE(fdP);
1528 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1531 /* Salvage in reverse order--read/write volume last; this way any
1532 * Inodes not referenced by the time we salvage the read/write volume
1533 * can be picked up by the read/write volume */
1534 /* ACTUALLY, that's not done right now--the inodes just vanish */
1535 for (i = nVols - 1; i >= salvageTo; i--) {
1537 struct InodeSummary *lisp = &isp[i];
1538 #ifdef AFS_NAMEI_ENV
1539 /* If only the RO is present on this partition, the link table
1540 * shows up as a RW volume special file. Need to make sure the
1541 * salvager doesn't try to salvage the non-existent RW.
1543 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1544 /* If this only special inode is the link table, continue */
1545 if (inodes->u.special.type == VI_LINKTABLE) {
1552 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1553 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1554 /* Check inodes twice. The second time do things seriously. This
1555 * way the whole RO volume can be deleted, below, if anything goes wrong */
1556 for (check = 1; check >= 0; check--) {
1558 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1560 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1561 if (rw && deleteMe) {
1562 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1563 * volume won't be called */
1569 if (rw && check == 1)
1571 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1572 MaybeZapVolume(lisp, "Vnode index", 0, check);
1578 /* Fix actual inode counts */
1580 Log("totalInodes %d\n",totalInodes);
1581 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1582 static int TraceBadLinkCounts = 0;
1583 #ifdef AFS_NAMEI_ENV
1584 if (VGLinkH->ih_ino == ip->inodeNumber) {
1585 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1586 VGLinkH_p1 = ip->u.param[0];
1587 continue; /* Deal with this last. */
1590 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1591 TraceBadLinkCounts--; /* Limit reports, per volume */
1592 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1594 while (ip->linkCount > 0) {
1595 /* below used to assert, not break */
1597 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1598 Log("idec failed. inode %s errno %d\n",
1599 PrintInode(NULL, ip->inodeNumber), errno);
1605 while (ip->linkCount < 0) {
1606 /* these used to be asserts */
1608 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1609 Log("iinc failed. inode %s errno %d\n",
1610 PrintInode(NULL, ip->inodeNumber), errno);
1617 #ifdef AFS_NAMEI_ENV
1618 while (dec_VGLinkH > 0) {
1619 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1620 Log("idec failed on link table, errno = %d\n", errno);
1624 while (dec_VGLinkH < 0) {
1625 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1626 Log("iinc failed on link table, errno = %d\n", errno);
1633 /* Directory consistency checks on the rw volume */
1635 SalvageVolume(isp, VGLinkH);
1636 IH_RELEASE(VGLinkH);
1638 if (canfork && !debug) {
1645 QuickCheck(register struct InodeSummary *isp, int nVols)
1647 /* Check headers BEFORE forking */
1651 for (i = 0; i < nVols; i++) {
1652 struct VolumeSummary *vs = isp[i].volSummary;
1653 VolumeDiskData volHeader;
1655 /* Don't salvage just because phantom rw volume is there... */
1656 /* (If a read-only volume exists, read/write inodes must also exist) */
1657 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1661 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1662 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1663 == sizeof(volHeader)
1664 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1665 && volHeader.dontSalvage == DONT_SALVAGE
1666 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1667 if (volHeader.inUse != 0) {
1668 volHeader.inUse = 0;
1669 volHeader.inService = 1;
1671 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1672 != sizeof(volHeader)) {
1688 /* SalvageVolumeHeaderFile
1690 * Salvage the top level V*.vol header file. Make sure the special files
1691 * exist and that there are no duplicates.
1693 * Calls SalvageHeader for each possible type of volume special file.
1697 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1698 register struct ViceInodeInfo *inodes, int RW,
1699 int check, int *deleteMe)
1702 register struct ViceInodeInfo *ip;
1703 int allinodesobsolete = 1;
1704 struct VolumeDiskHeader diskHeader;
1705 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
1708 /* keeps track of special inodes that are probably 'good'; they are
1709 * referenced in the vol header, and are included in the given inodes
1714 } goodspecial[MAXINODETYPE];
1719 memset(goodspecial, 0, sizeof(goodspecial));
1721 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
1723 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
1725 Log("cannot allocate memory for inode skip array when salvaging "
1726 "volume %lu; not performing duplicate special inode recovery\n",
1727 afs_printable_uint32_lu(isp->volumeId));
1728 /* still try to perform the salvage; the skip array only does anything
1729 * if we detect duplicate special inodes */
1733 * First, look at the special inodes and see if any are referenced by
1734 * the existing volume header. If we find duplicate special inodes, we
1735 * can use this information to use the referenced inode (it's more
1736 * likely to be the 'good' one), and throw away the duplicates.
1738 if (isp->volSummary && skip) {
1739 /* use tempHeader, so we can use the stuff[] array to easily index
1740 * into the isp->volSummary special inodes */
1741 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
1743 for (i = 0; i < isp->nSpecialInodes; i++) {
1744 ip = &inodes[isp->index + i];
1745 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1746 /* will get taken care of in a later loop */
1749 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
1750 goodspecial[ip->u.special.type-1].valid = 1;
1751 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
1756 memset(&tempHeader, 0, sizeof(tempHeader));
1757 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1758 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1759 tempHeader.id = isp->volumeId;
1760 tempHeader.parent = isp->RWvolumeId;
1762 /* Check for duplicates (inodes are sorted by type field) */
1763 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1764 ip = &inodes[isp->index + i];
1765 if (ip->u.special.type == (ip + 1)->u.special.type) {
1766 afs_ino_str_t stmp1, stmp2;
1768 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1769 /* Will be caught in the loop below */
1773 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
1774 ip->u.special.type, isp->volumeId,
1775 PrintInode(stmp1, ip->inodeNumber),
1776 PrintInode(stmp2, (ip+1)->inodeNumber));
1778 if (skip && goodspecial[ip->u.special.type-1].valid) {
1779 Inode gi = goodspecial[ip->u.special.type-1].inode;
1782 Log("using special inode referenced by vol header (%s)\n",
1783 PrintInode(stmp1, gi));
1786 /* the volume header references some special inode of
1787 * this type in the inodes array; are we it? */
1788 if (ip->inodeNumber != gi) {
1790 } else if ((ip+1)->inodeNumber != gi) {
1791 /* in case this is the last iteration; we need to
1792 * make sure we check ip+1, too */
1797 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
1805 for (i = 0; i < isp->nSpecialInodes; i++) {
1806 ip = &inodes[isp->index + i];
1807 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1809 Log("Rubbish header inode %s of type %d\n",
1810 PrintInode(NULL, ip->inodeNumber),
1811 ip->u.special.type);
1817 Log("Rubbish header inode %s of type %d; deleted\n",
1818 PrintInode(NULL, ip->inodeNumber),
1819 ip->u.special.type);
1820 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1821 if (skip && skip[i]) {
1822 if (orphans == ORPH_REMOVE) {
1823 Log("Removing orphan special inode %s of type %d\n",
1824 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1827 Log("Ignoring orphan special inode %s of type %d\n",
1828 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
1829 /* fall through to the ip->linkCount--; line below */
1832 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1833 allinodesobsolete = 0;
1835 if (!check && ip->u.special.type != VI_LINKTABLE)
1836 ip->linkCount--; /* Keep the inode around */
1844 if (allinodesobsolete) {
1851 VGLinkH_cnt++; /* one for every header. */
1853 if (!RW && !check && isp->volSummary) {
1854 ClearROInUseBit(isp->volSummary);
1858 for (i = 0; i < MAXINODETYPE; i++) {
1859 if (stuff[i].inodeType == VI_LINKTABLE) {
1860 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1861 * And we may have recreated the link table earlier, so set the
1862 * RW header as well.
1864 if (VALID_INO(VGLinkH->ih_ino)) {
1865 *stuff[i].inode = VGLinkH->ih_ino;
1869 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1873 if (isp->volSummary == NULL) {
1875 char headerName[64];
1876 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1877 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1879 Log("No header file for volume %u\n", isp->volumeId);
1883 Log("No header file for volume %u; %screating %s\n",
1884 isp->volumeId, (Testing ? "it would have been " : ""),
1886 isp->volSummary = (struct VolumeSummary *)
1887 malloc(sizeof(struct VolumeSummary));
1888 isp->volSummary->fileName = ToString(headerName);
1890 writefunc = VCreateVolumeDiskHeader;
1893 char headerName[64];
1894 /* hack: these two fields are obsolete... */
1895 isp->volSummary->header.volumeAcl = 0;
1896 isp->volSummary->header.volumeMountTable = 0;
1899 (&isp->volSummary->header, &tempHeader,
1900 sizeof(struct VolumeHeader))) {
1901 /* We often remove the name before calling us, so we make a fake one up */
1902 if (isp->volSummary->fileName) {
1903 strcpy(headerName, isp->volSummary->fileName);
1905 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1906 isp->volSummary->fileName = ToString(headerName);
1908 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1910 Log("Header file %s is damaged or no longer valid%s\n", path,
1911 (check ? "" : "; repairing"));
1915 writefunc = VWriteVolumeDiskHeader;
1919 memcpy(&isp->volSummary->header, &tempHeader,
1920 sizeof(struct VolumeHeader));
1923 Log("It would have written a new header file for volume %u\n",
1927 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1928 code = (*writefunc)(&diskHeader, fileSysPartition);
1930 Log("Error %ld writing volume header file for volume %lu\n",
1931 afs_printable_int32_ld(code),
1932 afs_printable_uint32_lu(diskHeader.id));
1937 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1938 isp->volSummary->header.volumeInfo);
1943 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1947 VolumeDiskData volumeInfo;
1948 struct versionStamp fileHeader;
1957 #ifndef AFS_NAMEI_ENV
1958 if (sp->inodeType == VI_LINKTABLE)
1961 if (*(sp->inode) == 0) {
1963 Log("Missing inode in volume header (%s)\n", sp->description);
1967 Log("Missing inode in volume header (%s); %s\n", sp->description,
1968 (Testing ? "it would have recreated it" : "recreating"));
1971 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1972 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1973 if (!VALID_INO(*(sp->inode)))
1975 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1976 sp->description, errno);
1981 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1982 fdP = IH_OPEN(specH);
1983 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1984 /* bail out early and destroy the volume */
1986 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1993 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1994 sp->description, errno);
1997 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1998 || header.fileHeader.magic != sp->stamp.magic)) {
2000 Log("Part of the header (%s) is corrupted\n", sp->description);
2001 FDH_REALLYCLOSE(fdP);
2005 Log("Part of the header (%s) is corrupted; recreating\n",
2009 if (sp->inodeType == VI_VOLINFO
2010 && header.volumeInfo.destroyMe == DESTROY_ME) {
2013 FDH_REALLYCLOSE(fdP);
2017 if (recreate && !Testing) {
2020 ("Internal error: recreating volume header (%s) in check mode\n",
2022 code = FDH_TRUNC(fdP, 0);
2024 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2025 sp->description, errno);
2027 /* The following code should be moved into vutil.c */
2028 if (sp->inodeType == VI_VOLINFO) {
2030 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2031 header.volumeInfo.stamp = sp->stamp;
2032 header.volumeInfo.id = isp->volumeId;
2033 header.volumeInfo.parentId = isp->RWvolumeId;
2034 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2035 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2036 isp->volumeId, isp->volumeId);
2037 header.volumeInfo.inService = 0;
2038 header.volumeInfo.blessed = 0;
2039 /* The + 1000 is a hack in case there are any files out in venus caches */
2040 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2041 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2042 header.volumeInfo.needsCallback = 0;
2043 gettimeofday(&tp, 0);
2044 header.volumeInfo.creationDate = tp.tv_sec;
2045 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2047 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2048 sp->description, errno);
2051 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2052 sizeof(header.volumeInfo));
2053 if (code != sizeof(header.volumeInfo)) {
2056 ("Unable to write volume header file (%s) (errno = %d)\n",
2057 sp->description, errno);
2058 Abort("Unable to write entire volume header file (%s)\n",
2062 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2064 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2065 sp->description, errno);
2067 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2068 if (code != sizeof(sp->stamp)) {
2071 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2072 sp->description, errno);
2074 ("Unable to write entire version stamp in volume header file (%s)\n",
2079 FDH_REALLYCLOSE(fdP);
2081 if (sp->inodeType == VI_VOLINFO) {
2082 VolInfo = header.volumeInfo;
2085 if (VolInfo.updateDate) {
2086 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
2088 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
2089 (Testing ? "it would have been " : ""), update);
2091 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
2093 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
2094 VolInfo.id, update);
2104 SalvageVnodes(register struct InodeSummary *rwIsp,
2105 register struct InodeSummary *thisIsp,
2106 register struct ViceInodeInfo *inodes, int check)
2108 int ilarge, ismall, ioffset, RW, nInodes;
2109 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2112 RW = (rwIsp == thisIsp);
2113 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2115 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2116 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2117 if (check && ismall == -1)
2120 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2121 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2122 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2126 SalvageIndex(Inode ino, VnodeClass class, int RW,
2127 register struct ViceInodeInfo *ip, int nInodes,
2128 struct VolumeSummary *volSummary, int check)
2130 VolumeId volumeNumber;
2131 char buf[SIZEOF_LARGEDISKVNODE];
2132 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2134 StreamHandle_t *file;
2135 struct VnodeClassInfo *vcp;
2137 afs_fsize_t vnodeLength;
2138 int vnodeIndex, nVnodes;
2139 afs_ino_str_t stmp1, stmp2;
2143 volumeNumber = volSummary->header.id;
2144 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2145 fdP = IH_OPEN(handle);
2146 assert(fdP != NULL);
2147 file = FDH_FDOPEN(fdP, "r+");
2148 assert(file != NULL);
2149 vcp = &VnodeClassInfo[class];
2150 size = OS_SIZE(fdP->fd_fd);
2152 nVnodes = (size / vcp->diskSize) - 1;
2154 assert((nVnodes + 1) * vcp->diskSize == size);
2155 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2159 for (vnodeIndex = 0;
2160 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2161 nVnodes--, vnodeIndex++) {
2162 if (vnode->type != vNull) {
2163 int vnodeChanged = 0;
2164 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2165 /* Log programs that belong to root (potentially suid root);
2166 * don't bother for read-only or backup volumes */
2167 #ifdef notdef /* This is done elsewhere */
2168 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2169 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2171 if (VNDISK_GET_INO(vnode) == 0) {
2173 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2174 memset(vnode, 0, vcp->diskSize);
2178 if (vcp->magic != vnode->vnodeMagic) {
2179 /* bad magic #, probably partially created vnode */
2180 Log("Partially allocated vnode %d deleted.\n",
2182 memset(vnode, 0, vcp->diskSize);
2186 /* ****** Should do a bit more salvage here: e.g. make sure
2187 * vnode type matches what it should be given the index */
2188 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2189 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2190 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2191 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2198 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2199 /* The following doesn't work, because the version number
2200 * is not maintained correctly by the file server */
2201 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2202 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2204 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2210 /* For RW volume, look for vnode with matching inode number;
2211 * if no such match, take the first determined by our sort
2213 register struct ViceInodeInfo *lip = ip;
2214 register int lnInodes = nInodes;
2216 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2217 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2226 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2227 /* "Matching" inode */
2231 vu = vnode->uniquifier;
2232 iu = ip->u.vnode.vnodeUniquifier;
2233 vd = vnode->dataVersion;
2234 id = ip->u.vnode.inodeDataVersion;
2236 * Because of the possibility of the uniquifier overflows (> 4M)
2237 * we compare them modulo the low 22-bits; we shouldn't worry
2238 * about mismatching since they shouldn't to many old
2239 * uniquifiers of the same vnode...
2241 if (IUnique(vu) != IUnique(iu)) {
2243 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2246 vnode->uniquifier = iu;
2247 #ifdef AFS_3DISPARES
2248 vnode->dataVersion = (id >= vd ?
2251 1887437 ? vd : id) :
2254 1887437 ? id : vd));
2256 #if defined(AFS_SGI_EXMAG)
2257 vnode->dataVersion = (id >= vd ?
2260 15099494 ? vd : id) :
2263 15099494 ? id : vd));
2265 vnode->dataVersion = (id > vd ? id : vd);
2266 #endif /* AFS_SGI_EXMAG */
2267 #endif /* AFS_3DISPARES */
2270 /* don't bother checking for vd > id any more, since
2271 * partial file transfers always result in this state,
2272 * and you can't do much else anyway (you've already
2273 * found the best data you can) */
2274 #ifdef AFS_3DISPARES
2275 if (!vnodeIsDirectory(vnodeNumber)
2276 && ((vd < id && (id - vd) < 1887437)
2277 || ((vd > id && (vd - id) > 1887437)))) {
2279 #if defined(AFS_SGI_EXMAG)
2280 if (!vnodeIsDirectory(vnodeNumber)
2281 && ((vd < id && (id - vd) < 15099494)
2282 || ((vd > id && (vd - id) > 15099494)))) {
2284 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2285 #endif /* AFS_SGI_EXMAG */
2288 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2289 vnode->dataVersion = id;
2294 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2297 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2299 VNDISK_SET_INO(vnode, ip->inodeNumber);
2304 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2306 VNDISK_SET_INO(vnode, ip->inodeNumber);
2309 VNDISK_GET_LEN(vnodeLength, vnode);
2310 if (ip->byteCount != vnodeLength) {
2313 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2318 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2319 VNDISK_SET_LEN(vnode, ip->byteCount);
2323 ip->linkCount--; /* Keep the inode around */
2326 } else { /* no matching inode */
2327 if (VNDISK_GET_INO(vnode) != 0
2328 || vnode->type == vDirectory) {
2329 /* No matching inode--get rid of the vnode */
2331 if (VNDISK_GET_INO(vnode)) {
2333 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2337 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2342 if (VNDISK_GET_INO(vnode)) {
2344 time_t serverModifyTime = vnode->serverModifyTime;
2345 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2349 time_t serverModifyTime = vnode->serverModifyTime;
2350 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2353 memset(vnode, 0, vcp->diskSize);
2356 /* Should not reach here becuase we checked for
2357 * (inodeNumber == 0) above. And where we zero the vnode,
2358 * we also goto vnodeDone.
2362 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2366 } /* VNDISK_GET_INO(vnode) != 0 */
2368 assert(!(vnodeChanged && check));
2369 if (vnodeChanged && !Testing) {
2371 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2372 (char *)vnode, vcp->diskSize)
2374 VolumeChanged = 1; /* For break call back */
2385 struct VnodeEssence *
2386 CheckVnodeNumber(VnodeId vnodeNumber)
2389 struct VnodeInfo *vip;
2392 class = vnodeIdToClass(vnodeNumber);
2393 vip = &vnodeInfo[class];
2394 offset = vnodeIdToBitNumber(vnodeNumber);
2395 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2399 CopyOnWrite(register struct DirSummary *dir)
2401 /* Copy the directory unconditionally if we are going to change it:
2402 * not just if was cloned.
2404 struct VnodeDiskObject vnode;
2405 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2406 Inode oldinode, newinode;
2409 if (dir->copied || Testing)
2411 DFlush(); /* Well justified paranoia... */
2414 IH_IREAD(vnodeInfo[vLarge].handle,
2415 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2417 assert(code == sizeof(vnode));
2418 oldinode = VNDISK_GET_INO(&vnode);
2419 /* Increment the version number by a whole lot to avoid problems with
2420 * clients that were promised new version numbers--but the file server
2421 * crashed before the versions were written to disk.
2424 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2425 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2427 assert(VALID_INO(newinode));
2428 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2430 VNDISK_SET_INO(&vnode, newinode);
2432 IH_IWRITE(vnodeInfo[vLarge].handle,
2433 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2435 assert(code == sizeof(vnode));
2437 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2438 fileSysDevice, newinode);
2439 /* Don't delete the original inode right away, because the directory is
2440 * still being scanned.
2446 * This function should either successfully create a new dir, or give up
2447 * and leave things the way they were. In particular, if it fails to write
2448 * the new dir properly, it should return w/o changing the reference to the
2452 CopyAndSalvage(register struct DirSummary *dir)
2454 struct VnodeDiskObject vnode;
2455 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2456 Inode oldinode, newinode;
2461 afs_int32 parentUnique = 1;
2462 struct VnodeEssence *vnodeEssence;
2467 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2469 IH_IREAD(vnodeInfo[vLarge].handle,
2470 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2472 assert(lcode == sizeof(vnode));
2473 oldinode = VNDISK_GET_INO(&vnode);
2474 /* Increment the version number by a whole lot to avoid problems with
2475 * clients that were promised new version numbers--but the file server
2476 * crashed before the versions were written to disk.
2479 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2480 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2482 assert(VALID_INO(newinode));
2483 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2485 /* Assign . and .. vnode numbers from dir and vnode.parent.
2486 * The uniquifier for . is in the vnode.
2487 * The uniquifier for .. might be set to a bogus value of 1 and
2488 * the salvager will later clean it up.
2490 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2491 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2494 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2496 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2501 /* didn't really build the new directory properly, let's just give up. */
2502 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2503 Log("Directory salvage returned code %d, continuing.\n", code);
2505 Log("also failed to decrement link count on new inode");
2509 Log("Checking the results of the directory salvage...\n");
2510 if (!DirOK(&newdir)) {
2511 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2512 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2517 VNDISK_SET_INO(&vnode, newinode);
2518 length = Length(&newdir);
2519 VNDISK_SET_LEN(&vnode, length);
2521 IH_IWRITE(vnodeInfo[vLarge].handle,
2522 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2524 assert(lcode == sizeof(vnode));
2527 nt_sync(fileSysDevice);
2529 sync(); /* this is slow, but hopefully rarely called. We don't have
2530 * an open FD on the file itself to fsync.
2534 vnodeInfo[vLarge].handle->ih_synced = 1;
2536 /* make sure old directory file is really closed */
2537 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2538 FDH_REALLYCLOSE(fdP);
2540 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2542 dir->dirHandle = newdir;
2546 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2549 struct DirSummary *dir = (struct DirSummary *)dirVal;
2550 struct VnodeEssence *vnodeEssence;
2551 afs_int32 dirOrphaned, todelete;
2553 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2555 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2556 if (vnodeEssence == NULL) {
2558 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2562 assert(Delete(&dir->dirHandle, name) == 0);
2567 #ifndef AFS_NAMEI_ENV
2568 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2569 * mount inode for the partition. If this inode were deleted, it would crash
2572 if (vnodeEssence->InodeNumber == 0) {
2573 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2576 assert(Delete(&dir->dirHandle, name) == 0);
2583 if (!(vnodeNumber & 1) && !Showmode
2584 && !(vnodeEssence->count || vnodeEssence->unique
2585 || vnodeEssence->modeBits)) {
2586 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2587 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2588 vnodeNumber, unique,
2589 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2594 assert(Delete(&dir->dirHandle, name) == 0);
2600 /* Check if the Uniquifiers match. If not, change the directory entry
2601 * so its unique matches the vnode unique. Delete if the unique is zero
2602 * or if the directory is orphaned.
2604 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2605 if (!vnodeEssence->unique
2606 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2607 /* This is an orphaned directory. Don't delete the . or ..
2608 * entry. Otherwise, it will get created in the next
2609 * salvage and deleted again here. So Just skip it.
2614 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2617 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2621 fid.Vnode = vnodeNumber;
2622 fid.Unique = vnodeEssence->unique;
2624 assert(Delete(&dir->dirHandle, name) == 0);
2626 assert(Create(&dir->dirHandle, name, &fid) == 0);
2629 return 0; /* no need to continue */
2632 if (strcmp(name, ".") == 0) {
2633 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2636 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2639 assert(Delete(&dir->dirHandle, ".") == 0);
2640 fid.Vnode = dir->vnodeNumber;
2641 fid.Unique = dir->unique;
2642 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2645 vnodeNumber = fid.Vnode; /* Get the new Essence */
2646 unique = fid.Unique;
2647 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2650 } else if (strcmp(name, "..") == 0) {
2653 struct VnodeEssence *dotdot;
2654 pa.Vnode = dir->parent;
2655 dotdot = CheckVnodeNumber(pa.Vnode);
2656 assert(dotdot != NULL); /* XXX Should not be assert */
2657 pa.Unique = dotdot->unique;
2659 pa.Vnode = dir->vnodeNumber;
2660 pa.Unique = dir->unique;
2662 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2664 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2667 assert(Delete(&dir->dirHandle, "..") == 0);
2668 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2671 vnodeNumber = pa.Vnode; /* Get the new Essence */
2673 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2675 dir->haveDotDot = 1;
2676 } else if (strncmp(name, ".__afs", 6) == 0) {
2678 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2682 assert(Delete(&dir->dirHandle, name) == 0);
2684 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2685 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2688 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2689 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2690 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2691 && !(vnodeEssence->modeBits & 0111)) {
2697 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2698 vnodeEssence->InodeNumber);
2701 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2705 size = FDH_SIZE(fdP);
2707 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2708 FDH_REALLYCLOSE(fdP);
2715 code = FDH_READ(fdP, buf, size);
2718 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2719 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2720 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2721 Testing ? "would convert" : "converted");
2722 vnodeEssence->modeBits |= 0111;
2723 vnodeEssence->changed = 1;
2724 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2725 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2726 dir->name ? dir->name : "??", name, buf);
2728 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2729 dir->vname, vnodeNumber, size, code);
2731 FDH_REALLYCLOSE(fdP);
2734 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2735 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2736 if (vnodeIdToClass(vnodeNumber) == vLarge
2737 && vnodeEssence->name == NULL) {
2739 if ((n = (char *)malloc(strlen(name) + 1)))
2741 vnodeEssence->name = n;
2744 /* The directory entry points to the vnode. Check to see if the
2745 * vnode points back to the directory. If not, then let the
2746 * directory claim it (else it might end up orphaned). Vnodes
2747 * already claimed by another directory are deleted from this
2748 * directory: hardlinks to the same vnode are not allowed
2749 * from different directories.
2751 if (vnodeEssence->parent != dir->vnodeNumber) {
2752 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2753 /* Vnode does not point back to this directory.
2754 * Orphaned dirs cannot claim a file (it may belong to
2755 * another non-orphaned dir).
2758 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2760 vnodeEssence->parent = dir->vnodeNumber;
2761 vnodeEssence->changed = 1;
2763 /* Vnode was claimed by another directory */
2766 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2767 } else if (vnodeNumber == 1) {
2768 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2770 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2775 assert(Delete(&dir->dirHandle, name) == 0);
2780 /* This directory claims the vnode */
2781 vnodeEssence->claimed = 1;
2783 vnodeEssence->count--;
2788 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2790 register struct VnodeInfo *vip = &vnodeInfo[class];
2791 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2792 char buf[SIZEOF_LARGEDISKVNODE];
2793 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2795 StreamHandle_t *file;
2800 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2801 fdP = IH_OPEN(vip->handle);
2802 assert(fdP != NULL);
2803 file = FDH_FDOPEN(fdP, "r+");
2804 assert(file != NULL);
2805 size = OS_SIZE(fdP->fd_fd);
2807 vip->nVnodes = (size / vcp->diskSize) - 1;
2808 if (vip->nVnodes > 0) {
2809 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2810 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2811 assert((vip->vnodes = (struct VnodeEssence *)
2812 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2813 if (class == vLarge) {
2814 assert((vip->inodes = (Inode *)
2815 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2824 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2825 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2826 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2827 nVnodes--, vnodeIndex++) {
2828 if (vnode->type != vNull) {
2829 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2830 afs_fsize_t vnodeLength;
2831 vip->nAllocatedVnodes++;
2832 vep->count = vnode->linkCount;
2833 VNDISK_GET_LEN(vnodeLength, vnode);
2834 vep->blockCount = nBlocks(vnodeLength);
2835 vip->volumeBlockCount += vep->blockCount;
2836 vep->parent = vnode->parent;
2837 vep->unique = vnode->uniquifier;
2838 if (*maxu < vnode->uniquifier)
2839 *maxu = vnode->uniquifier;
2840 vep->modeBits = vnode->modeBits;
2841 vep->InodeNumber = VNDISK_GET_INO(vnode);
2842 vep->type = vnode->type;
2843 vep->author = vnode->author;
2844 vep->owner = vnode->owner;
2845 vep->group = vnode->group;
2846 if (vnode->type == vDirectory) {
2847 if (class != vLarge) {
2848 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2849 vip->nAllocatedVnodes--;
2850 memset(vnode, 0, sizeof(vnode));
2851 IH_IWRITE(vnodeInfo[vSmall].handle,
2852 vnodeIndexOffset(vcp, vnodeNumber),
2853 (char *)&vnode, sizeof(vnode));
2856 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2865 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2867 struct VnodeEssence *parentvp;
2873 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2874 && GetDirName(vp->parent, parentvp, path)) {
2876 strcat(path, vp->name);
2882 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2883 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2886 IsVnodeOrphaned(VnodeId vnode)
2888 struct VnodeEssence *vep;
2891 return (1); /* Vnode zero does not exist */
2893 return (0); /* The root dir vnode is always claimed */
2894 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2895 if (!vep || !vep->claimed)
2896 return (1); /* Vnode is not claimed - it is orphaned */
2898 return (IsVnodeOrphaned(vep->parent));
2902 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2903 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2906 static struct DirSummary dir;
2907 static struct DirHandle dirHandle;
2908 struct VnodeEssence *parent;
2909 static char path[MAXPATHLEN];
2912 if (dirVnodeInfo->vnodes[i].salvaged)
2913 return; /* already salvaged */
2916 dirVnodeInfo->vnodes[i].salvaged = 1;
2918 if (dirVnodeInfo->inodes[i] == 0)
2919 return; /* Not allocated to a directory */
2921 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2922 if (dirVnodeInfo->vnodes[i].parent) {
2923 Log("Bad parent, vnode 1; %s...\n",
2924 (Testing ? "skipping" : "salvaging"));
2925 dirVnodeInfo->vnodes[i].parent = 0;
2926 dirVnodeInfo->vnodes[i].changed = 1;
2929 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2930 if (parent && parent->salvaged == 0)
2931 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2932 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2933 rootdir, rootdirfound);
2936 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2937 dir.unique = dirVnodeInfo->vnodes[i].unique;
2940 dir.parent = dirVnodeInfo->vnodes[i].parent;
2941 dir.haveDot = dir.haveDotDot = 0;
2942 dir.ds_linkH = alinkH;
2943 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2944 dirVnodeInfo->inodes[i]);
2946 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2949 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2950 (Testing ? "skipping" : "salvaging"));
2953 CopyAndSalvage(&dir);
2957 dirHandle = dir.dirHandle;
2960 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2961 &dirVnodeInfo->vnodes[i], path);
2964 /* If enumeration failed for random reasons, we will probably delete
2965 * too much stuff, so we guard against this instead.
2967 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2970 /* Delete the old directory if it was copied in order to salvage.
2971 * CopyOnWrite has written the new inode # to the disk, but we still
2972 * have the old one in our local structure here. Thus, we idec the
2976 if (dir.copied && !Testing) {
2977 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2979 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2982 /* Remember rootdir DirSummary _after_ it has been judged */
2983 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2984 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2992 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2994 /* This routine, for now, will only be called for read-write volumes */
2996 int BlocksInVolume = 0, FilesInVolume = 0;
2997 register VnodeClass class;
2998 struct DirSummary rootdir, oldrootdir;
2999 struct VnodeInfo *dirVnodeInfo;
3000 struct VnodeDiskObject vnode;
3001 VolumeDiskData volHeader;
3003 int orphaned, rootdirfound = 0;
3004 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3005 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3006 struct VnodeEssence *vep;
3009 afs_sfsize_t nBytes;
3011 VnodeId LFVnode, ThisVnode;
3012 Unique LFUnique, ThisUnique;
3015 vid = rwIsp->volSummary->header.id;
3016 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3017 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3018 assert(nBytes == sizeof(volHeader));
3019 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3020 assert(volHeader.destroyMe != DESTROY_ME);
3021 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3023 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
3025 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
3028 dirVnodeInfo = &vnodeInfo[vLarge];
3029 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3030 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
3034 nt_sync(fileSysDevice);
3036 sync(); /* This used to be done lower level, for every dir */
3043 /* Parse each vnode looking for orphaned vnodes and
3044 * connect them to the tree as orphaned (if requested).
3046 oldrootdir = rootdir;
3047 for (class = 0; class < nVNODECLASSES; class++) {
3048 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
3049 vep = &(vnodeInfo[class].vnodes[v]);
3050 ThisVnode = bitNumberToVnodeNumber(v, class);
3051 ThisUnique = vep->unique;
3053 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3054 continue; /* Ignore unused, claimed, and root vnodes */
3056 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3057 * entry in this vnode had incremented the parent link count (In
3058 * JudgeEntry()). We need to go to the parent and decrement that
3059 * link count. But if the parent's unique is zero, then the parent
3060 * link count was not incremented in JudgeEntry().
3062 if (class == vLarge) { /* directory vnode */
3063 pv = vnodeIdToBitNumber(vep->parent);
3064 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
3065 vnodeInfo[vLarge].vnodes[pv].count++;
3069 continue; /* If no rootdir, can't attach orphaned files */
3071 /* Here we attach orphaned files and directories into the
3072 * root directory, LVVnode, making sure link counts stay correct.
3074 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3075 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3076 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3078 /* Update this orphaned vnode's info. Its parent info and
3079 * link count (do for orphaned directories and files).
3081 vep->parent = LFVnode; /* Parent is the root dir */
3082 vep->unique = LFUnique;
3085 vep->count--; /* Inc link count (root dir will pt to it) */
3087 /* If this orphaned vnode is a directory, change '..'.
3088 * The name of the orphaned dir/file is unknown, so we
3089 * build a unique name. No need to CopyOnWrite the directory
3090 * since it is not connected to tree in BK or RO volume and
3091 * won't be visible there.
3093 if (class == vLarge) {
3097 /* Remove and recreate the ".." entry in this orphaned directory */
3098 SetSalvageDirHandle(&dh, vid, fileSysDevice,
3099 vnodeInfo[class].inodes[v]);
3101 pa.Unique = LFUnique;
3102 assert(Delete(&dh, "..") == 0);
3103 assert(Create(&dh, "..", &pa) == 0);
3105 /* The original parent's link count was decremented above.
3106 * Here we increment the new parent's link count.
3108 pv = vnodeIdToBitNumber(LFVnode);
3109 vnodeInfo[vLarge].vnodes[pv].count--;
3113 /* Go to the root dir and add this entry. The link count of the
3114 * root dir was incremented when ".." was created. Try 10 times.
3116 for (j = 0; j < 10; j++) {
3117 pa.Vnode = ThisVnode;
3118 pa.Unique = ThisUnique;
3120 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3122 vLarge) ? "__ORPHANDIR__" :
3123 "__ORPHANFILE__"), ThisVnode,
3126 CopyOnWrite(&rootdir);
3127 code = Create(&rootdir.dirHandle, npath, &pa);
3131 ThisUnique += 50; /* Try creating a different file */
3134 Log("Attaching orphaned %s to volume's root dir as %s\n",
3135 ((class == vLarge) ? "directory" : "file"), npath);
3137 } /* for each vnode in the class */
3138 } /* for each class of vnode */
3140 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3142 if (!oldrootdir.copied && rootdir.copied) {
3144 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3147 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3150 DFlush(); /* Flush the changes */
3151 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3152 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3153 orphans = ORPH_IGNORE;
3156 /* Write out all changed vnodes. Orphaned files and directories
3157 * will get removed here also (if requested).
3159 for (class = 0; class < nVNODECLASSES; class++) {
3160 int nVnodes = vnodeInfo[class].nVnodes;
3161 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3162 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3163 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3164 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3165 for (i = 0; i < nVnodes; i++) {
3166 register struct VnodeEssence *vnp = &vnodes[i];
3167 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3169 /* If the vnode is good but is unclaimed (not listed in
3170 * any directory entries), then it is orphaned.
3173 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3174 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3178 if (vnp->changed || vnp->count) {
3182 IH_IREAD(vnodeInfo[class].handle,
3183 vnodeIndexOffset(vcp, vnodeNumber),
3184 (char *)&vnode, sizeof(vnode));
3185 assert(nBytes == sizeof(vnode));
3187 vnode.parent = vnp->parent;
3188 oldCount = vnode.linkCount;
3189 vnode.linkCount = vnode.linkCount - vnp->count;
3192 orphaned = IsVnodeOrphaned(vnodeNumber);
3194 if (!vnp->todelete) {
3195 /* Orphans should have already been attached (if requested) */
3196 assert(orphans != ORPH_ATTACH);
3197 oblocks += vnp->blockCount;
3200 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3202 BlocksInVolume -= vnp->blockCount;
3204 if (VNDISK_GET_INO(&vnode)) {
3206 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3209 memset(&vnode, 0, sizeof(vnode));
3211 } else if (vnp->count) {
3213 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3216 vnode.modeBits = vnp->modeBits;
3219 vnode.dataVersion++;
3222 IH_IWRITE(vnodeInfo[class].handle,
3223 vnodeIndexOffset(vcp, vnodeNumber),
3224 (char *)&vnode, sizeof(vnode));
3225 assert(nBytes == sizeof(vnode));
3231 if (!Showmode && ofiles) {
3232 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3234 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3238 for (class = 0; class < nVNODECLASSES; class++) {
3239 register struct VnodeInfo *vip = &vnodeInfo[class];
3240 for (i = 0; i < vip->nVnodes; i++)
3241 if (vip->vnodes[i].name)
3242 free(vip->vnodes[i].name);
3249 /* Set correct resource utilization statistics */
3250 volHeader.filecount = FilesInVolume;
3251 volHeader.diskused = BlocksInVolume;
3253 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3254 if (volHeader.uniquifier < (maxunique + 1)) {
3256 Log("Volume uniquifier is too low; fixed\n");
3257 /* Plus 2,000 in case there are workstations out there with
3258 * cached vnodes that have since been deleted
3260 volHeader.uniquifier = (maxunique + 1 + 2000);
3263 /* Turn off the inUse bit; the volume's been salvaged! */
3264 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3265 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3266 volHeader.inService = 1; /* allow service again */
3267 volHeader.needsCallback = (VolumeChanged != 0);
3268 volHeader.dontSalvage = DONT_SALVAGE;
3271 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3272 assert(nBytes == sizeof(volHeader));
3275 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3276 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3277 FilesInVolume, BlocksInVolume);
3279 IH_RELEASE(vnodeInfo[vSmall].handle);
3280 IH_RELEASE(vnodeInfo[vLarge].handle);
3286 ClearROInUseBit(struct VolumeSummary *summary)
3288 IHandle_t *h = summary->volumeInfoHandle;
3289 afs_sfsize_t nBytes;
3291 VolumeDiskData volHeader;
3293 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3294 assert(nBytes == sizeof(volHeader));
3295 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3296 volHeader.inUse = 0;
3297 volHeader.needsSalvaged = 0;
3298 volHeader.inService = 1;
3299 volHeader.dontSalvage = DONT_SALVAGE;
3301 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3302 assert(nBytes == sizeof(volHeader));
3307 * Possible delete the volume.
3309 * deleteMe - Always do so, only a partial volume.
3312 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3315 if (readOnly(isp) || deleteMe) {
3316 if (isp->volSummary && isp->volSummary->fileName) {
3319 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3321 Log("It will be deleted on this server (you may find it elsewhere)\n");
3324 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3326 Log("it will be deleted instead. It should be recloned.\n");
3331 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
3333 code = VDestroyVolumeDiskHeader(fileSysPartition, isp->volumeId, isp->RWvolumeId);
3335 Log("Error %ld destroying volume disk header for volume %lu\n",
3336 afs_printable_int32_ld(code),
3337 afs_printable_uint32_lu(isp->volumeId));
3340 /* make sure we actually delete the fileName file; ENOENT
3341 * is fine, since VDestroyVolumeDiskHeader probably already
3343 if (unlink(path) && errno != ENOENT) {
3344 Log("Unable to unlink %s (errno = %d)\n", path, errno);
3348 } else if (!check) {
3349 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3351 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3357 AskOffline(VolumeId volumeId, char * partition)
3361 for (i = 0; i < 3; i++) {
3362 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3364 if (code == SYNC_OK) {
3366 } else if (code == SYNC_DENIED) {
3367 #ifdef DEMAND_ATTACH_ENABLE
3368 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3370 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3372 Abort("Salvage aborted\n");
3373 } else if (code == SYNC_BAD_COMMAND) {
3374 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3376 #ifdef DEMAND_ATTACH_ENABLE
3377 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3379 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3381 Abort("Salvage aborted\n");
3384 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3385 FSYNC_clientFinis();
3389 if (code != SYNC_OK) {
3390 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3391 Abort("Salvage aborted\n");
3394 #ifdef AFS_DEMAND_ATTACH_FS
3395 /* set inUse = programType in the volume header. We do this in case
3396 * the fileserver restarts/crashes while we are salvaging.
3397 * Otherwise, the fileserver could attach the volume again on
3398 * startup while we are salvaging, which would be very bad, or
3399 * schedule another salvage while we are salvaging, which would be
3403 struct VolumeHeader header;
3404 struct VolumeDiskHeader diskHeader;
3405 struct VolumeDiskData volHeader;
3407 code = VReadVolumeDiskHeader(volumeId, fileSysPartition, &diskHeader);
3412 DiskToVolumeHeader(&header, &diskHeader);
3414 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
3415 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
3416 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
3422 volHeader.inUse = programType;
3424 /* If we can't re-write the header, bail out and error. We don't
3425 * assert when reading the header, since it's possible the
3426 * header isn't really there (when there's no data associated
3427 * with the volume; we just delete the vol header file in that
3428 * case). But if it's there enough that we can read it, but
3429 * somehow we cannot write to it to signify we're salvaging it,
3430 * we've got a big problem and we cannot continue. */
3431 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
3435 #endif /* AFS_DEMAND_ATTACH_FS */
3439 AskOnline(VolumeId volumeId, char *partition)
3443 for (i = 0; i < 3; i++) {
3444 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3446 if (code == SYNC_OK) {
3448 } else if (code == SYNC_DENIED) {
3449 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3450 } else if (code == SYNC_BAD_COMMAND) {
3451 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3453 #ifdef DEMAND_ATTACH_ENABLE
3454 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3456 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3461 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3462 FSYNC_clientFinis();
3469 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3471 /* Volume parameter is passed in case iopen is upgraded in future to
3472 * require a volume Id to be passed
3475 IHandle_t *srcH, *destH;
3476 FdHandle_t *srcFdP, *destFdP;
3479 IH_INIT(srcH, device, rwvolume, inode1);
3480 srcFdP = IH_OPEN(srcH);
3481 assert(srcFdP != NULL);
3482 IH_INIT(destH, device, rwvolume, inode2);
3483 destFdP = IH_OPEN(destH);
3485 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3486 assert(FDH_WRITE(destFdP, buf, n) == n);
3488 FDH_REALLYCLOSE(srcFdP);
3489 FDH_REALLYCLOSE(destFdP);
3496 PrintInodeList(void)
3498 register struct ViceInodeInfo *ip;
3499 struct ViceInodeInfo *buf;
3500 struct afs_stat status;
3501 register int nInodes;
3503 assert(afs_fstat(inodeFd, &status) == 0);
3504 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3505 assert(buf != NULL);
3506 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3507 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3508 for (ip = buf; nInodes--; ip++) {
3509 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3510 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3511 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3512 ip->u.param[2], ip->u.param[3]);
3518 PrintInodeSummary(void)
3521 struct InodeSummary *isp;
3523 for (i = 0; i < nVolumesInInodeFile; i++) {
3524 isp = &inodeSummary[i];
3525 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3530 PrintVolumeSummary(void)
3533 struct VolumeSummary *vsp;
3535 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3536 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3546 assert(0); /* Fork is never executed in the NT code path */
3550 #ifdef AFS_DEMAND_ATTACH_FS
3551 if ((f == 0) && (programType == salvageServer)) {
3552 /* we are a salvageserver child */
3553 #ifdef FSSYNC_BUILD_CLIENT
3554 VChildProcReconnectFS_r();
3556 #ifdef SALVSYNC_BUILD_CLIENT
3560 #endif /* AFS_DEMAND_ATTACH_FS */
3561 #endif /* !AFS_NT40_ENV */
3571 #ifdef AFS_DEMAND_ATTACH_FS
3572 if (programType == salvageServer) {
3573 #ifdef SALVSYNC_BUILD_CLIENT
3576 #ifdef FSSYNC_BUILD_CLIENT
3580 #endif /* AFS_DEMAND_ATTACH_FS */
3583 if (main_thread != pthread_self())
3584 pthread_exit((void *)code);
3597 pid = wait(&status);
3599 if (WCOREDUMP(status))
3600 Log("\"%s\" core dumped!\n", prog);
3601 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3607 TimeStamp(time_t clock, int precision)
3610 static char timestamp[20];
3611 lt = localtime(&clock);
3613 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3615 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3620 CheckLogFile(char * log_path)
3622 char oldSlvgLog[AFSDIR_PATH_MAX];
3624 #ifndef AFS_NT40_ENV
3631 strcpy(oldSlvgLog, log_path);
3632 strcat(oldSlvgLog, ".old");
3634 renamefile(log_path, oldSlvgLog);
3635 logFile = afs_fopen(log_path, "a");
3637 if (!logFile) { /* still nothing, use stdout */
3641 #ifndef AFS_NAMEI_ENV
3642 AFS_DEBUG_IOPS_LOG(logFile);
3647 #ifndef AFS_NT40_ENV
3649 TimeStampLogFile(char * log_path)
3651 char stampSlvgLog[AFSDIR_PATH_MAX];
3656 lt = localtime(&now);
3657 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3658 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3659 log_path, lt->tm_year + 1900,
3660 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3663 /* try to link the logfile to a timestamped filename */
3664 /* if it fails, oh well, nothing we can do */
3665 link(log_path, stampSlvgLog);
3674 #ifndef AFS_NT40_ENV
3676 printf("Can't show log since using syslog.\n");
3687 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3690 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3693 while (fgets(line, sizeof(line), logFile))
3700 Log(const char *format, ...)
3706 va_start(args, format);
3707 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3709 #ifndef AFS_NT40_ENV
3711 syslog(LOG_INFO, "%s", tmp);
3715 gettimeofday(&now, 0);
3716 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3722 Abort(const char *format, ...)
3727 va_start(args, format);
3728 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3730 #ifndef AFS_NT40_ENV
3732 syslog(LOG_INFO, "%s", tmp);
3736 fprintf(logFile, "%s", tmp);
3751 p = (char *)malloc(strlen(s) + 1);
3757 /* Remove the FORCESALVAGE file */
3759 RemoveTheForce(char *path)
3762 struct afs_stat force; /* so we can use afs_stat to find it */
3763 strcpy(target,path);
3764 strcat(target,"/FORCESALVAGE");
3765 if (!Testing && ForceSalvage) {
3766 if (afs_stat(target,&force) == 0) unlink(target);
3770 #ifndef AFS_AIX32_ENV
3772 * UseTheForceLuke - see if we can use the force
3775 UseTheForceLuke(char *path)
3777 struct afs_stat force;
3779 strcpy(target,path);
3780 strcat(target,"/FORCESALVAGE");
3782 return (afs_stat(target, &force) == 0);
3786 * UseTheForceLuke - see if we can use the force
3789 * The VRMIX fsck will not muck with the filesystem it is supposedly
3790 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3791 * muck directly with the root inode, which is within the normal
3793 * ListViceInodes() has a side effect of setting ForceSalvage if
3794 * it detects a need, based on root inode examination.
3797 UseTheForceLuke(char *path)
3800 return 0; /* sorry OB1 */
3805 /* NT support routines */
3807 static char execpathname[MAX_PATH];
3809 nt_SalvagePartition(char *partName, int jobn)
3814 if (!*execpathname) {
3815 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3816 if (!n || n == 1023)
3819 job.cj_magic = SALVAGER_MAGIC;
3820 job.cj_number = jobn;
3821 (void)strcpy(job.cj_part, partName);
3822 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3827 nt_SetupPartitionSalvage(void *datap, int len)
3829 childJob_t *jobp = (childJob_t *) datap;
3830 char logname[AFSDIR_PATH_MAX];
3832 if (len != sizeof(childJob_t))
3834 if (jobp->cj_magic != SALVAGER_MAGIC)
3839 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3841 logFile = afs_fopen(logname, "w");
3849 #endif /* AFS_NT40_ENV */