2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
108 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
109 #define WCOREDUMP(x) (x & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
197 /*@+fcnmacros +macrofcndecl@*/
200 extern off64_t afs_lseek(int FD, off64_t O, int F);
201 #endif /*S_SPLINT_S */
202 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
203 #define afs_stat stat64
204 #define afs_fstat fstat64
205 #define afs_open open64
206 #define afs_fopen fopen64
207 #else /* !O_LARGEFILE */
209 extern off_t afs_lseek(int FD, off_t O, int F);
210 #endif /*S_SPLINT_S */
211 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
212 #define afs_stat stat
213 #define afs_fstat fstat
214 #define afs_open open
215 #define afs_fopen fopen
216 #endif /* !O_LARGEFILE */
217 /*@=fcnmacros =macrofcndecl@*/
220 extern void *calloc();
222 static char *TimeStamp(time_t clock, int precision);
225 int debug; /* -d flag */
226 extern int Testing; /* -n flag */
227 int ListInodeOption; /* -i flag */
228 int ShowRootFiles; /* -r flag */
229 int RebuildDirs; /* -sal flag */
230 int Parallel = 4; /* -para X flag */
231 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
232 int forceR = 0; /* -b flag */
233 int ShowLog = 0; /* -showlog flag */
234 int ShowSuid = 0; /* -showsuid flag */
235 int ShowMounts = 0; /* -showmounts flag */
236 int orphans = ORPH_IGNORE; /* -orphans option */
241 int useSyslog = 0; /* -syslog flag */
242 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
251 #define MAXPARALLEL 32
253 int OKToZap; /* -o flag */
254 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
255 * in the volume header */
257 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
259 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
261 Device fileSysDevice; /* The device number of the current
262 * partition being salvaged */
266 char *fileSysPath; /* The path of the mounted partition currently
267 * being salvaged, i.e. the directory
268 * containing the volume headers */
270 char *fileSysPathName; /* NT needs this to make name pretty in log. */
271 IHandle_t *VGLinkH; /* Link handle for current volume group. */
272 int VGLinkH_cnt; /* # of references to lnk handle. */
273 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
275 char *fileSysDeviceName; /* The block device where the file system
276 * being salvaged was mounted */
277 char *filesysfulldev;
279 int VolumeChanged; /* Set by any routine which would change the volume in
280 * a way which would require callback is to be broken if the
281 * volume was put back on line by an active file server */
283 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
285 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
286 int inodeFd; /* File descriptor for inode file */
289 struct VnodeInfo vnodeInfo[nVNODECLASSES];
292 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
293 int nVolumes; /* Number of volumes (read-write and read-only)
294 * in volume summary */
300 /* Forward declarations */
301 /*@printflike@*/ void Log(const char *format, ...);
302 /*@printflike@*/ void Abort(const char *format, ...);
303 static int IsVnodeOrphaned(VnodeId vnode);
305 /* Uniquifier stored in the Inode */
310 return (u & 0x3fffff);
312 #if defined(AFS_SGI_EXMAG)
313 return (u & SGI_UNIQMASK);
316 #endif /* AFS_SGI_EXMAG */
321 BadError(register int aerror)
323 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
325 return 0; /* otherwise may be transient, e.g. EMFILE */
330 char *save_args[MAX_ARGS];
332 extern pthread_t main_thread;
333 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
336 /* Get the salvage lock if not already held. Hold until process exits. */
338 ObtainSalvageLock(void)
344 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
345 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
346 if (salvageLock == INVALID_FD) {
348 "salvager: There appears to be another salvager running! Aborted.\n");
353 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
354 if (salvageLock < 0) {
356 "salvager: can't open salvage lock file %s, aborting\n",
357 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
360 #ifdef AFS_DARWIN_ENV
361 if (flock(salvageLock, LOCK_EX) == -1) {
363 if (lockf(salvageLock, F_LOCK, 0) == -1) {
366 "salvager: There appears to be another salvager running! Aborted.\n");
373 #ifdef AFS_SGI_XFS_IOPS_ENV
374 /* Check if the given partition is mounted. For XFS, the root inode is not a
375 * constant. So we check the hard way.
378 IsPartitionMounted(char *part)
381 struct mntent *mntent;
383 assert(mntfp = setmntent(MOUNTED, "r"));
384 while (mntent = getmntent(mntfp)) {
385 if (!strcmp(part, mntent->mnt_dir))
390 return mntent ? 1 : 1;
393 /* Check if the given inode is the root of the filesystem. */
394 #ifndef AFS_SGI_XFS_IOPS_ENV
396 IsRootInode(struct afs_stat *status)
399 * The root inode is not a fixed value in XFS partitions. So we need to
400 * see if the partition is in the list of mounted partitions. This only
401 * affects the SalvageFileSys path, so we check there.
403 return (status->st_ino == ROOTINODE);
408 #ifndef AFS_NAMEI_ENV
409 /* We don't want to salvage big files filesystems, since we can't put volumes on
413 CheckIfBigFilesFS(char *mountPoint, char *devName)
415 struct superblock fs;
418 if (strncmp(devName, "/dev/", 5)) {
419 (void)sprintf(name, "/dev/%s", devName);
421 (void)strcpy(name, devName);
424 if (ReadSuper(&fs, name) < 0) {
425 Log("Unable to read superblock. Not salvaging partition %s.\n",
429 if (IsBigFilesFileSystem(&fs)) {
430 Log("Partition %s is a big files filesystem, not salvaging.\n",
440 #define HDSTR "\\Device\\Harddisk"
441 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
443 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
448 static int dowarn = 1;
450 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
452 if (strncmp(res, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res, HDSTR, p1->devName);
460 d1 = atoi(&res[HDLEN]);
462 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
464 if (strncmp(res, HDSTR, HDLEN)) {
467 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
468 res, HDSTR, p2->devName);
472 d2 = atoi(&res[HDLEN]);
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, '/');
698 ptr = (char *)strrchr(pbuffer, '/');
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 static char tmpDevName[100];
712 static char wpath[100];
713 struct VolumeSummary *vsp, *esp;
716 fileSysPartition = partP;
717 fileSysDevice = fileSysPartition->device;
718 fileSysPathName = VPartitionPath(fileSysPartition);
721 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
722 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
723 name = partP->devName;
725 fileSysPath = fileSysPathName;
726 strcpy(tmpDevName, partP->devName);
727 name = get_DevName(tmpDevName, wpath);
728 fileSysDeviceName = name;
729 filesysfulldev = wpath;
732 VLockPartition(partP->name);
733 if (singleVolumeNumber || ForceSalvage)
736 ForceSalvage = UseTheForceLuke(fileSysPath);
738 if (singleVolumeNumber) {
739 /* salvageserver already setup fssync conn for us */
740 if ((programType != salvageServer) && !VConnectFS()) {
741 Abort("Couldn't connect to file server\n");
743 AskOffline(singleVolumeNumber, partP->name);
746 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
747 partP->name, name, (Testing ? "(READONLY mode)" : ""));
749 Log("***Forced salvage of all volumes on this partition***\n");
754 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
761 assert((dirp = opendir(fileSysPath)) != NULL);
762 while ((dp = readdir(dirp))) {
763 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
764 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
766 Log("Removing old salvager temp files %s\n", dp->d_name);
767 strcpy(npath, fileSysPath);
769 strcat(npath, dp->d_name);
775 tdir = (tmpdir ? tmpdir : fileSysPath);
777 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
778 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
780 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
783 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
784 unlink(inodeListPath);
788 /* Using nt_unlink here since we're really using the delete on close
789 * semantics of unlink. In most places in the salvager, we really do
790 * mean to unlink the file at that point. Those places have been
791 * modified to actually do that so that the NT crt can be used there.
794 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
795 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
797 inodeFd = afs_open(inodeListPath, O_RDONLY);
798 unlink(inodeListPath);
801 Abort("Temporary file %s is missing...\n", inodeListPath);
802 if (ListInodeOption) {
806 /* enumerate volumes in the partition.
807 * figure out sets of read-only + rw volumes.
808 * salvage each set, read-only volumes first, then read-write.
809 * Fix up inodes on last volume in set (whether it is read-write
812 GetVolumeSummary(singleVolumeNumber);
814 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
815 i < nVolumesInInodeFile; i = j) {
816 VolumeId rwvid = inodeSummary[i].RWvolumeId;
818 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
820 VolumeId vid = inodeSummary[j].volumeId;
821 struct VolumeSummary *tsp;
822 /* Scan volume list (from partition root directory) looking for the
823 * current rw volume number in the volume list from the inode scan.
824 * If there is one here that is not in the inode volume list,
826 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
828 DeleteExtraVolumeHeaderFile(vsp);
830 /* Now match up the volume summary info from the root directory with the
831 * entry in the volume list obtained from scanning inodes */
832 inodeSummary[j].volSummary = NULL;
833 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
834 if (tsp->header.id == vid) {
835 inodeSummary[j].volSummary = tsp;
841 /* Salvage the group of volumes (several read-only + 1 read/write)
842 * starting with the current read-only volume we're looking at.
844 SalvageVolumeGroup(&inodeSummary[i], j - i);
847 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
848 for (; vsp < esp; vsp++) {
850 DeleteExtraVolumeHeaderFile(vsp);
853 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
854 RemoveTheForce(fileSysPath);
856 if (!Testing && singleVolumeNumber) {
857 AskOnline(singleVolumeNumber, fileSysPartition->name);
859 /* Step through the volumeSummary list and set all volumes on-line.
860 * The volumes were taken off-line in GetVolumeSummary.
862 for (j = 0; j < nVolumes; j++) {
863 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
867 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
868 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
871 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
875 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
878 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", vsp->fileName, (Testing ? "would have been " : ""));
880 unlink(vsp->fileName);
885 CompareInodes(const void *_p1, const void *_p2)
887 register const struct ViceInodeInfo *p1 = _p1;
888 register const struct ViceInodeInfo *p2 = _p2;
889 if (p1->u.vnode.vnodeNumber == INODESPECIAL
890 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
891 VolumeId p1rwid, p2rwid;
893 (p1->u.vnode.vnodeNumber ==
894 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
896 (p2->u.vnode.vnodeNumber ==
897 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
902 if (p1->u.vnode.vnodeNumber == INODESPECIAL
903 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
904 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
905 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
906 if (p1->u.vnode.volumeId == p1rwid)
908 if (p2->u.vnode.volumeId == p2rwid)
910 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
912 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
913 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
914 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
916 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
918 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
920 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
922 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
924 /* The following tests are reversed, so that the most desirable
925 * of several similar inodes comes first */
926 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
928 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
929 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
933 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
934 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
939 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
941 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
942 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
946 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
947 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
952 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
954 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
955 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
959 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
960 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
965 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
967 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
968 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
972 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
973 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
982 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
983 register struct InodeSummary *summary)
985 VolumeId volume = ip->u.vnode.volumeId;
986 VolumeId rwvolume = volume;
987 register int n, nSpecial;
988 register Unique maxunique;
991 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
993 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
995 rwvolume = ip->u.special.parentId;
996 /* This isn't quite right, as there could (in error) be different
997 * parent inodes in different special vnodes */
999 if (maxunique < ip->u.vnode.vnodeUniquifier)
1000 maxunique = ip->u.vnode.vnodeUniquifier;
1004 summary->volumeId = volume;
1005 summary->RWvolumeId = rwvolume;
1006 summary->nInodes = n;
1007 summary->nSpecialInodes = nSpecial;
1008 summary->maxUniquifier = maxunique;
1012 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1014 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1015 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1016 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1021 * Collect list of inodes in file named by path. If a truly fatal error,
1022 * unlink the file and abort. For lessor errors, return -1. The file will
1023 * be unlinked by the caller.
1026 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1028 struct afs_stat status;
1030 struct ViceInodeInfo *ip;
1031 struct InodeSummary summary;
1032 char summaryFileName[50];
1035 char *dev = fileSysPath;
1036 char *wpath = fileSysPath;
1038 char *dev = fileSysDeviceName;
1039 char *wpath = filesysfulldev;
1041 char *part = fileSysPath;
1044 /* This file used to come from vfsck; cobble it up ourselves now... */
1046 ListViceInodes(dev, fileSysPath, path,
1047 singleVolumeNumber ? OnlyOneVolume : 0,
1048 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1050 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1054 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1056 if (forceSal && !ForceSalvage) {
1057 Log("***Forced salvage of all volumes on this partition***\n");
1060 inodeFd = afs_open(path, O_RDWR);
1061 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1063 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1065 tdir = (tmpdir ? tmpdir : part);
1067 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1068 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1070 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1071 "%s/salvage.temp.%d", tdir, getpid());
1073 summaryFile = afs_fopen(summaryFileName, "a+");
1074 if (summaryFile == NULL) {
1077 Abort("Unable to create inode summary file\n");
1079 if (!canfork || debug || Fork() == 0) {
1081 unsigned long st_size=(unsigned long) status.st_size;
1082 nInodes = st_size / sizeof(struct ViceInodeInfo);
1084 fclose(summaryFile);
1086 unlink(summaryFileName);
1087 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1088 RemoveTheForce(fileSysPath);
1090 struct VolumeSummary *vsp;
1093 GetVolumeSummary(singleVolumeNumber);
1095 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1097 DeleteExtraVolumeHeaderFile(vsp);
1100 Log("%s vice inodes on %s; not salvaged\n",
1101 singleVolumeNumber ? "No applicable" : "No", dev);
1104 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1106 fclose(summaryFile);
1109 unlink(summaryFileName);
1111 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1114 if (read(inodeFd, ip, st_size) != st_size) {
1115 fclose(summaryFile);
1118 unlink(summaryFileName);
1119 Abort("Unable to read inode table; %s not salvaged\n", dev);
1121 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1122 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1123 || write(inodeFd, ip, st_size) != st_size) {
1124 fclose(summaryFile);
1127 unlink(summaryFileName);
1128 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1132 CountVolumeInodes(ip, nInodes, &summary);
1133 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1134 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1135 fclose(summaryFile);
1139 summary.index += (summary.nInodes);
1140 nInodes -= summary.nInodes;
1141 ip += summary.nInodes;
1143 /* Following fflush is not fclose, because if it was debug mode would not work */
1144 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1145 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1146 fclose(summaryFile);
1150 if (canfork && !debug) {
1155 if (Wait("Inode summary") == -1) {
1156 fclose(summaryFile);
1159 unlink(summaryFileName);
1160 Exit(1); /* salvage of this partition aborted */
1163 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1164 if (status.st_size != 0) {
1166 unsigned long st_status=(unsigned long)status.st_size;
1167 inodeSummary = (struct InodeSummary *)malloc(st_status);
1168 assert(inodeSummary != NULL);
1169 /* For GNU we need to do lseek to get the file pointer moved. */
1170 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1171 ret = read(fileno(summaryFile), inodeSummary, st_status);
1172 assert(ret == st_status);
1174 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1175 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1176 fclose(summaryFile);
1178 unlink(summaryFileName);
1182 /* Comparison routine for volume sort.
1183 This is setup so that a read-write volume comes immediately before
1184 any read-only clones of that volume */
1186 CompareVolumes(const void *_p1, const void *_p2)
1188 register const struct VolumeSummary *p1 = _p1;
1189 register const struct VolumeSummary *p2 = _p2;
1190 if (p1->header.parent != p2->header.parent)
1191 return p1->header.parent < p2->header.parent ? -1 : 1;
1192 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1194 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1196 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1200 GetVolumeSummary(VolumeId singleVolumeNumber)
1203 afs_int32 nvols = 0;
1204 struct VolumeSummary *vsp, vs;
1205 struct VolumeDiskHeader diskHeader;
1208 /* Get headers from volume directory */
1209 dirp = opendir(fileSysPath);
1211 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1212 if (!singleVolumeNumber) {
1213 while ((dp = readdir(dirp))) {
1214 char *p = dp->d_name;
1215 p = strrchr(dp->d_name, '.');
1216 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1219 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1220 if ((fd = afs_open(name, O_RDONLY)) != -1
1221 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1222 == sizeof(diskHeader)
1223 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1224 DiskToVolumeHeader(&vs.header, &diskHeader);
1232 dirp = opendir("."); /* No rewinddir for NT */
1239 (struct VolumeSummary *)malloc(nvols *
1240 sizeof(struct VolumeSummary));
1243 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1244 assert(volumeSummaryp != NULL);
1247 vsp = volumeSummaryp;
1248 while ((dp = readdir(dirp))) {
1249 char *p = dp->d_name;
1250 p = strrchr(dp->d_name, '.');
1251 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1255 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1256 if ((fd = afs_open(name, O_RDONLY)) == -1
1257 || read(fd, &diskHeader, sizeof(diskHeader))
1258 != sizeof(diskHeader)
1259 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1264 if (!singleVolumeNumber) {
1266 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1271 char nameShouldBe[64];
1272 DiskToVolumeHeader(&vsp->header, &diskHeader);
1273 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1274 && vsp->header.parent != singleVolumeNumber) {
1275 if (programType == salvageServer) {
1276 #ifdef SALVSYNC_BUILD_CLIENT
1277 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1278 vsp->header.id, vsp->header.parent);
1279 if (SALVSYNC_LinkVolume(vsp->header.parent,
1281 fileSysPartition->name,
1283 Log("schedule request failed\n");
1286 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1288 Log("%u is a read-only volume; not salvaged\n",
1289 singleVolumeNumber);
1293 if (!singleVolumeNumber
1294 || (vsp->header.id == singleVolumeNumber
1295 || vsp->header.parent == singleVolumeNumber)) {
1296 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1297 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1298 if (singleVolumeNumber
1299 && vsp->header.id != singleVolumeNumber)
1300 AskOffline(vsp->header.id, fileSysPartition->name);
1301 if (strcmp(nameShouldBe, dp->d_name)) {
1303 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1307 vsp->fileName = ToString(dp->d_name);
1317 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1321 /* Find the link table. This should be associated with the RW volume or, if
1322 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1325 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1326 struct ViceInodeInfo *allInodes)
1329 struct ViceInodeInfo *ip;
1331 for (i = 0; i < nVols; i++) {
1332 ip = allInodes + isp[i].index;
1333 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1334 if (ip[j].u.special.type == VI_LINKTABLE)
1335 return ip[j].inodeNumber;
1342 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1344 struct versionStamp version;
1347 if (!VALID_INO(ino))
1349 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1350 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1351 if (!VALID_INO(ino))
1353 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1354 isp->RWvolumeId, errno);
1355 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1356 fdP = IH_OPEN(VGLinkH);
1358 Abort("Can't open link table for volume %u (error = %d)\n",
1359 isp->RWvolumeId, errno);
1361 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1362 Abort("Can't truncate link table for volume %u (error = %d)\n",
1363 isp->RWvolumeId, errno);
1365 version.magic = LINKTABLEMAGIC;
1366 version.version = LINKTABLEVERSION;
1368 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1370 Abort("Can't truncate link table for volume %u (error = %d)\n",
1371 isp->RWvolumeId, errno);
1373 FDH_REALLYCLOSE(fdP);
1375 /* If the volume summary exits (i.e., the V*.vol header file exists),
1376 * then set this inode there as well.
1378 if (isp->volSummary)
1379 isp->volSummary->header.linkTable = ino;
1388 SVGParms_t *parms = (SVGParms_t *) arg;
1389 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1394 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1397 pthread_attr_t tattr;
1401 /* Initialize per volume global variables, even if later code does so */
1405 memset(&VolInfo, 0, sizeof(VolInfo));
1407 parms.svgp_inodeSummaryp = isp;
1408 parms.svgp_count = nVols;
1409 code = pthread_attr_init(&tattr);
1411 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1415 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1417 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1420 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1422 Log("Failed to create thread to salvage volume group %u\n",
1426 (void)pthread_join(tid, NULL);
1428 #endif /* AFS_NT40_ENV */
1431 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1433 struct ViceInodeInfo *inodes, *allInodes, *ip;
1434 int i, totalInodes, size, salvageTo;
1438 int dec_VGLinkH = 0;
1440 FdHandle_t *fdP = NULL;
1443 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1444 && isp->nSpecialInodes > 0);
1445 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1446 if (!ForceSalvage && QuickCheck(isp, nVols))
1449 if (ShowMounts && !haveRWvolume)
1451 if (canfork && !debug && Fork() != 0) {
1452 (void)Wait("Salvage volume group");
1455 for (i = 0, totalInodes = 0; i < nVols; i++)
1456 totalInodes += isp[i].nInodes;
1457 size = totalInodes * sizeof(struct ViceInodeInfo);
1458 inodes = (struct ViceInodeInfo *)malloc(size);
1459 allInodes = inodes - isp->index; /* this would the base of all the inodes
1460 * for the partition, if all the inodes
1461 * had been read into memory */
1463 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1465 assert(read(inodeFd, inodes, size) == size);
1467 /* Don't try to salvage a read write volume if there isn't one on this
1469 salvageTo = haveRWvolume ? 0 : 1;
1471 #ifdef AFS_NAMEI_ENV
1472 ino = FindLinkHandle(isp, nVols, allInodes);
1473 if (VALID_INO(ino)) {
1474 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1475 fdP = IH_OPEN(VGLinkH);
1477 if (!VALID_INO(ino) || fdP == NULL) {
1478 Log("%s link table for volume %u.\n",
1479 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1481 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1484 struct ViceInodeInfo *ip;
1485 CreateLinkTable(isp, ino);
1486 fdP = IH_OPEN(VGLinkH);
1487 /* Sync fake 1 link counts to the link table, now that it exists */
1489 for (i = 0; i < nVols; i++) {
1490 ip = allInodes + isp[i].index;
1491 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1493 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1495 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1503 FDH_REALLYCLOSE(fdP);
1505 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1508 /* Salvage in reverse order--read/write volume last; this way any
1509 * Inodes not referenced by the time we salvage the read/write volume
1510 * can be picked up by the read/write volume */
1511 /* ACTUALLY, that's not done right now--the inodes just vanish */
1512 for (i = nVols - 1; i >= salvageTo; i--) {
1514 struct InodeSummary *lisp = &isp[i];
1515 #ifdef AFS_NAMEI_ENV
1516 /* If only the RO is present on this partition, the link table
1517 * shows up as a RW volume special file. Need to make sure the
1518 * salvager doesn't try to salvage the non-existent RW.
1520 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1521 /* If this only special inode is the link table, continue */
1522 if (inodes->u.special.type == VI_LINKTABLE) {
1529 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1530 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1531 /* Check inodes twice. The second time do things seriously. This
1532 * way the whole RO volume can be deleted, below, if anything goes wrong */
1533 for (check = 1; check >= 0; check--) {
1535 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1537 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1538 if (rw && deleteMe) {
1539 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1540 * volume won't be called */
1546 if (rw && check == 1)
1548 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1549 MaybeZapVolume(lisp, "Vnode index", 0, check);
1555 /* Fix actual inode counts */
1557 Log("totalInodes %d\n",totalInodes);
1558 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1559 static int TraceBadLinkCounts = 0;
1560 #ifdef AFS_NAMEI_ENV
1561 if (VGLinkH->ih_ino == ip->inodeNumber) {
1562 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1563 VGLinkH_p1 = ip->u.param[0];
1564 continue; /* Deal with this last. */
1567 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1568 TraceBadLinkCounts--; /* Limit reports, per volume */
1569 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1571 while (ip->linkCount > 0) {
1572 /* below used to assert, not break */
1574 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1575 Log("idec failed. inode %s errno %d\n",
1576 PrintInode(NULL, ip->inodeNumber), errno);
1582 while (ip->linkCount < 0) {
1583 /* these used to be asserts */
1585 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1586 Log("iinc failed. inode %s errno %d\n",
1587 PrintInode(NULL, ip->inodeNumber), errno);
1594 #ifdef AFS_NAMEI_ENV
1595 while (dec_VGLinkH > 0) {
1596 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1597 Log("idec failed on link table, errno = %d\n", errno);
1601 while (dec_VGLinkH < 0) {
1602 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1603 Log("iinc failed on link table, errno = %d\n", errno);
1610 /* Directory consistency checks on the rw volume */
1612 SalvageVolume(isp, VGLinkH);
1613 IH_RELEASE(VGLinkH);
1615 if (canfork && !debug) {
1622 QuickCheck(register struct InodeSummary *isp, int nVols)
1624 /* Check headers BEFORE forking */
1628 for (i = 0; i < nVols; i++) {
1629 struct VolumeSummary *vs = isp[i].volSummary;
1630 VolumeDiskData volHeader;
1632 /* Don't salvage just because phantom rw volume is there... */
1633 /* (If a read-only volume exists, read/write inodes must also exist) */
1634 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1638 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1639 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1640 == sizeof(volHeader)
1641 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1642 && volHeader.dontSalvage == DONT_SALVAGE
1643 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1644 if (volHeader.inUse != 0) {
1645 volHeader.inUse = 0;
1646 volHeader.inService = 1;
1648 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1649 != sizeof(volHeader)) {
1665 /* SalvageVolumeHeaderFile
1667 * Salvage the top level V*.vol header file. Make sure the special files
1668 * exist and that there are no duplicates.
1670 * Calls SalvageHeader for each possible type of volume special file.
1674 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1675 register struct ViceInodeInfo *inodes, int RW,
1676 int check, int *deleteMe)
1680 register struct ViceInodeInfo *ip;
1681 int allinodesobsolete = 1;
1682 struct VolumeDiskHeader diskHeader;
1686 memset(&tempHeader, 0, sizeof(tempHeader));
1687 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1688 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1689 tempHeader.id = isp->volumeId;
1690 tempHeader.parent = isp->RWvolumeId;
1691 /* Check for duplicates (inodes are sorted by type field) */
1692 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1693 ip = &inodes[isp->index + i];
1694 if (ip->u.special.type == (ip + 1)->u.special.type) {
1696 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1700 for (i = 0; i < isp->nSpecialInodes; i++) {
1701 ip = &inodes[isp->index + i];
1702 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1704 Log("Rubbish header inode\n");
1707 Log("Rubbish header inode; deleted\n");
1708 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1709 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1710 if (!check && ip->u.special.type != VI_LINKTABLE)
1711 ip->linkCount--; /* Keep the inode around */
1712 allinodesobsolete = 0;
1716 if (allinodesobsolete) {
1723 VGLinkH_cnt++; /* one for every header. */
1725 if (!RW && !check && isp->volSummary) {
1726 ClearROInUseBit(isp->volSummary);
1730 for (i = 0; i < MAXINODETYPE; i++) {
1731 if (stuff[i].inodeType == VI_LINKTABLE) {
1732 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1733 * And we may have recreated the link table earlier, so set the
1734 * RW header as well.
1736 if (VALID_INO(VGLinkH->ih_ino)) {
1737 *stuff[i].inode = VGLinkH->ih_ino;
1741 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1745 if (isp->volSummary == NULL) {
1747 char headerName[64];
1748 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1749 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1751 Log("No header file for volume %u\n", isp->volumeId);
1755 Log("No header file for volume %u; %screating %s\n",
1756 isp->volumeId, (Testing ? "it would have been " : ""),
1758 headerFd = afs_open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
1759 assert(headerFd != -1);
1760 isp->volSummary = (struct VolumeSummary *)
1761 malloc(sizeof(struct VolumeSummary));
1762 isp->volSummary->fileName = ToString(headerName);
1765 char headerName[64];
1766 /* hack: these two fields are obsolete... */
1767 isp->volSummary->header.volumeAcl = 0;
1768 isp->volSummary->header.volumeMountTable = 0;
1771 (&isp->volSummary->header, &tempHeader,
1772 sizeof(struct VolumeHeader))) {
1773 /* We often remove the name before calling us, so we make a fake one up */
1774 if (isp->volSummary->fileName) {
1775 strcpy(headerName, isp->volSummary->fileName);
1777 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1778 isp->volSummary->fileName = ToString(headerName);
1780 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1782 Log("Header file %s is damaged or no longer valid%s\n", path,
1783 (check ? "" : "; repairing"));
1787 headerFd = afs_open(path, O_RDWR | O_TRUNC, 0644);
1788 assert(headerFd != -1);
1792 memcpy(&isp->volSummary->header, &tempHeader,
1793 sizeof(struct VolumeHeader));
1796 Log("It would have written a new header file for volume %u\n",
1799 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1800 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1801 != sizeof(struct VolumeDiskHeader)) {
1802 Log("Couldn't rewrite volume header file!\n");
1809 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1810 isp->volSummary->header.volumeInfo);
1815 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1819 VolumeDiskData volumeInfo;
1820 struct versionStamp fileHeader;
1829 #ifndef AFS_NAMEI_ENV
1830 if (sp->inodeType == VI_LINKTABLE)
1833 if (*(sp->inode) == 0) {
1835 Log("Missing inode in volume header (%s)\n", sp->description);
1839 Log("Missing inode in volume header (%s); %s\n", sp->description,
1840 (Testing ? "it would have recreated it" : "recreating"));
1843 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1844 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1845 if (!VALID_INO(*(sp->inode)))
1847 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1848 sp->description, errno);
1853 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1854 fdP = IH_OPEN(specH);
1855 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1856 /* bail out early and destroy the volume */
1858 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1865 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1866 sp->description, errno);
1869 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1870 || header.fileHeader.magic != sp->stamp.magic)) {
1872 Log("Part of the header (%s) is corrupted\n", sp->description);
1873 FDH_REALLYCLOSE(fdP);
1877 Log("Part of the header (%s) is corrupted; recreating\n",
1881 if (sp->inodeType == VI_VOLINFO
1882 && header.volumeInfo.destroyMe == DESTROY_ME) {
1885 FDH_REALLYCLOSE(fdP);
1889 if (recreate && !Testing) {
1892 ("Internal error: recreating volume header (%s) in check mode\n",
1894 code = FDH_TRUNC(fdP, 0);
1896 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1897 sp->description, errno);
1899 /* The following code should be moved into vutil.c */
1900 if (sp->inodeType == VI_VOLINFO) {
1902 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1903 header.volumeInfo.stamp = sp->stamp;
1904 header.volumeInfo.id = isp->volumeId;
1905 header.volumeInfo.parentId = isp->RWvolumeId;
1906 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1907 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1908 isp->volumeId, isp->volumeId);
1909 header.volumeInfo.inService = 0;
1910 header.volumeInfo.blessed = 0;
1911 /* The + 1000 is a hack in case there are any files out in venus caches */
1912 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1913 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1914 header.volumeInfo.needsCallback = 0;
1915 gettimeofday(&tp, 0);
1916 header.volumeInfo.creationDate = tp.tv_sec;
1917 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1919 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1920 sp->description, errno);
1923 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1924 sizeof(header.volumeInfo));
1925 if (code != sizeof(header.volumeInfo)) {
1928 ("Unable to write volume header file (%s) (errno = %d)\n",
1929 sp->description, errno);
1930 Abort("Unable to write entire volume header file (%s)\n",
1934 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1936 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1937 sp->description, errno);
1939 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1940 if (code != sizeof(sp->stamp)) {
1943 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1944 sp->description, errno);
1946 ("Unable to write entire version stamp in volume header file (%s)\n",
1951 FDH_REALLYCLOSE(fdP);
1953 if (sp->inodeType == VI_VOLINFO) {
1954 VolInfo = header.volumeInfo;
1957 if (VolInfo.updateDate) {
1958 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1960 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1961 (Testing ? "it would have been " : ""), update);
1963 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1965 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1966 VolInfo.id, update);
1976 SalvageVnodes(register struct InodeSummary *rwIsp,
1977 register struct InodeSummary *thisIsp,
1978 register struct ViceInodeInfo *inodes, int check)
1980 int ilarge, ismall, ioffset, RW, nInodes;
1981 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1984 RW = (rwIsp == thisIsp);
1985 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1987 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1988 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1989 if (check && ismall == -1)
1992 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1993 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1994 return (ilarge == 0 && ismall == 0 ? 0 : -1);
1998 SalvageIndex(Inode ino, VnodeClass class, int RW,
1999 register struct ViceInodeInfo *ip, int nInodes,
2000 struct VolumeSummary *volSummary, int check)
2002 VolumeId volumeNumber;
2003 char buf[SIZEOF_LARGEDISKVNODE];
2004 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2006 StreamHandle_t *file;
2007 struct VnodeClassInfo *vcp;
2009 afs_fsize_t vnodeLength;
2010 int vnodeIndex, nVnodes;
2011 afs_ino_str_t stmp1, stmp2;
2015 volumeNumber = volSummary->header.id;
2016 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2017 fdP = IH_OPEN(handle);
2018 assert(fdP != NULL);
2019 file = FDH_FDOPEN(fdP, "r+");
2020 assert(file != NULL);
2021 vcp = &VnodeClassInfo[class];
2022 size = OS_SIZE(fdP->fd_fd);
2024 nVnodes = (size / vcp->diskSize) - 1;
2026 assert((nVnodes + 1) * vcp->diskSize == size);
2027 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2031 for (vnodeIndex = 0;
2032 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2033 nVnodes--, vnodeIndex++) {
2034 if (vnode->type != vNull) {
2035 int vnodeChanged = 0;
2036 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2037 /* Log programs that belong to root (potentially suid root);
2038 * don't bother for read-only or backup volumes */
2039 #ifdef notdef /* This is done elsewhere */
2040 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2041 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2043 if (VNDISK_GET_INO(vnode) == 0) {
2045 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2046 memset(vnode, 0, vcp->diskSize);
2050 if (vcp->magic != vnode->vnodeMagic) {
2051 /* bad magic #, probably partially created vnode */
2052 Log("Partially allocated vnode %d deleted.\n",
2054 memset(vnode, 0, vcp->diskSize);
2058 /* ****** Should do a bit more salvage here: e.g. make sure
2059 * vnode type matches what it should be given the index */
2060 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2061 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2062 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2063 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2070 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2071 /* The following doesn't work, because the version number
2072 * is not maintained correctly by the file server */
2073 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2074 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2076 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2082 /* For RW volume, look for vnode with matching inode number;
2083 * if no such match, take the first determined by our sort
2085 register struct ViceInodeInfo *lip = ip;
2086 register int lnInodes = nInodes;
2088 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2089 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2098 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2099 /* "Matching" inode */
2103 vu = vnode->uniquifier;
2104 iu = ip->u.vnode.vnodeUniquifier;
2105 vd = vnode->dataVersion;
2106 id = ip->u.vnode.inodeDataVersion;
2108 * Because of the possibility of the uniquifier overflows (> 4M)
2109 * we compare them modulo the low 22-bits; we shouldn't worry
2110 * about mismatching since they shouldn't to many old
2111 * uniquifiers of the same vnode...
2113 if (IUnique(vu) != IUnique(iu)) {
2115 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2118 vnode->uniquifier = iu;
2119 #ifdef AFS_3DISPARES
2120 vnode->dataVersion = (id >= vd ?
2123 1887437 ? vd : id) :
2126 1887437 ? id : vd));
2128 #if defined(AFS_SGI_EXMAG)
2129 vnode->dataVersion = (id >= vd ?
2132 15099494 ? vd : id) :
2135 15099494 ? id : vd));
2137 vnode->dataVersion = (id > vd ? id : vd);
2138 #endif /* AFS_SGI_EXMAG */
2139 #endif /* AFS_3DISPARES */
2142 /* don't bother checking for vd > id any more, since
2143 * partial file transfers always result in this state,
2144 * and you can't do much else anyway (you've already
2145 * found the best data you can) */
2146 #ifdef AFS_3DISPARES
2147 if (!vnodeIsDirectory(vnodeNumber)
2148 && ((vd < id && (id - vd) < 1887437)
2149 || ((vd > id && (vd - id) > 1887437)))) {
2151 #if defined(AFS_SGI_EXMAG)
2152 if (!vnodeIsDirectory(vnodeNumber)
2153 && ((vd < id && (id - vd) < 15099494)
2154 || ((vd > id && (vd - id) > 15099494)))) {
2156 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2157 #endif /* AFS_SGI_EXMAG */
2160 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2161 vnode->dataVersion = id;
2166 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2169 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2171 VNDISK_SET_INO(vnode, ip->inodeNumber);
2176 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2178 VNDISK_SET_INO(vnode, ip->inodeNumber);
2181 VNDISK_GET_LEN(vnodeLength, vnode);
2182 if (ip->byteCount != vnodeLength) {
2185 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2190 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2191 VNDISK_SET_LEN(vnode, ip->byteCount);
2195 ip->linkCount--; /* Keep the inode around */
2198 } else { /* no matching inode */
2199 if (VNDISK_GET_INO(vnode) != 0
2200 || vnode->type == vDirectory) {
2201 /* No matching inode--get rid of the vnode */
2203 if (VNDISK_GET_INO(vnode)) {
2205 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2209 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2214 if (VNDISK_GET_INO(vnode)) {
2216 time_t serverModifyTime = vnode->serverModifyTime;
2217 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2221 time_t serverModifyTime = vnode->serverModifyTime;
2222 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2225 memset(vnode, 0, vcp->diskSize);
2228 /* Should not reach here becuase we checked for
2229 * (inodeNumber == 0) above. And where we zero the vnode,
2230 * we also goto vnodeDone.
2234 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2238 } /* VNDISK_GET_INO(vnode) != 0 */
2240 assert(!(vnodeChanged && check));
2241 if (vnodeChanged && !Testing) {
2243 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2244 (char *)vnode, vcp->diskSize)
2246 VolumeChanged = 1; /* For break call back */
2257 struct VnodeEssence *
2258 CheckVnodeNumber(VnodeId vnodeNumber)
2261 struct VnodeInfo *vip;
2264 class = vnodeIdToClass(vnodeNumber);
2265 vip = &vnodeInfo[class];
2266 offset = vnodeIdToBitNumber(vnodeNumber);
2267 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2271 CopyOnWrite(register struct DirSummary *dir)
2273 /* Copy the directory unconditionally if we are going to change it:
2274 * not just if was cloned.
2276 struct VnodeDiskObject vnode;
2277 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2278 Inode oldinode, newinode;
2281 if (dir->copied || Testing)
2283 DFlush(); /* Well justified paranoia... */
2286 IH_IREAD(vnodeInfo[vLarge].handle,
2287 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2289 assert(code == sizeof(vnode));
2290 oldinode = VNDISK_GET_INO(&vnode);
2291 /* Increment the version number by a whole lot to avoid problems with
2292 * clients that were promised new version numbers--but the file server
2293 * crashed before the versions were written to disk.
2296 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2297 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2299 assert(VALID_INO(newinode));
2300 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2302 VNDISK_SET_INO(&vnode, newinode);
2304 IH_IWRITE(vnodeInfo[vLarge].handle,
2305 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2307 assert(code == sizeof(vnode));
2309 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2310 fileSysDevice, newinode);
2311 /* Don't delete the original inode right away, because the directory is
2312 * still being scanned.
2318 * This function should either successfully create a new dir, or give up
2319 * and leave things the way they were. In particular, if it fails to write
2320 * the new dir properly, it should return w/o changing the reference to the
2324 CopyAndSalvage(register struct DirSummary *dir)
2326 struct VnodeDiskObject vnode;
2327 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2328 Inode oldinode, newinode;
2333 afs_int32 parentUnique = 1;
2334 struct VnodeEssence *vnodeEssence;
2339 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2341 IH_IREAD(vnodeInfo[vLarge].handle,
2342 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2344 assert(lcode == sizeof(vnode));
2345 oldinode = VNDISK_GET_INO(&vnode);
2346 /* Increment the version number by a whole lot to avoid problems with
2347 * clients that were promised new version numbers--but the file server
2348 * crashed before the versions were written to disk.
2351 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2352 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2354 assert(VALID_INO(newinode));
2355 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2357 /* Assign . and .. vnode numbers from dir and vnode.parent.
2358 * The uniquifier for . is in the vnode.
2359 * The uniquifier for .. might be set to a bogus value of 1 and
2360 * the salvager will later clean it up.
2362 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2363 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2366 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2368 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2373 /* didn't really build the new directory properly, let's just give up. */
2374 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2375 Log("Directory salvage returned code %d, continuing.\n", code);
2377 Log("also failed to decrement link count on new inode");
2381 Log("Checking the results of the directory salvage...\n");
2382 if (!DirOK(&newdir)) {
2383 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2384 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2389 VNDISK_SET_INO(&vnode, newinode);
2390 length = Length(&newdir);
2391 VNDISK_SET_LEN(&vnode, length);
2393 IH_IWRITE(vnodeInfo[vLarge].handle,
2394 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2396 assert(lcode == sizeof(vnode));
2399 nt_sync(fileSysDevice);
2401 sync(); /* this is slow, but hopefully rarely called. We don't have
2402 * an open FD on the file itself to fsync.
2406 vnodeInfo[vLarge].handle->ih_synced = 1;
2408 /* make sure old directory file is really closed */
2409 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2410 FDH_REALLYCLOSE(fdP);
2412 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2414 dir->dirHandle = newdir;
2418 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2421 struct DirSummary *dir = (struct DirSummary *)dirVal;
2422 struct VnodeEssence *vnodeEssence;
2423 afs_int32 dirOrphaned, todelete;
2425 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2427 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2428 if (vnodeEssence == NULL) {
2430 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2434 assert(Delete(&dir->dirHandle, name) == 0);
2439 #ifndef AFS_NAMEI_ENV
2440 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2441 * mount inode for the partition. If this inode were deleted, it would crash
2444 if (vnodeEssence->InodeNumber == 0) {
2445 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2448 assert(Delete(&dir->dirHandle, name) == 0);
2455 if (!(vnodeNumber & 1) && !Showmode
2456 && !(vnodeEssence->count || vnodeEssence->unique
2457 || vnodeEssence->modeBits)) {
2458 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2459 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2460 vnodeNumber, unique,
2461 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2466 assert(Delete(&dir->dirHandle, name) == 0);
2472 /* Check if the Uniquifiers match. If not, change the directory entry
2473 * so its unique matches the vnode unique. Delete if the unique is zero
2474 * or if the directory is orphaned.
2476 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2477 if (!vnodeEssence->unique
2478 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2479 /* This is an orphaned directory. Don't delete the . or ..
2480 * entry. Otherwise, it will get created in the next
2481 * salvage and deleted again here. So Just skip it.
2486 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2489 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2493 fid.Vnode = vnodeNumber;
2494 fid.Unique = vnodeEssence->unique;
2496 assert(Delete(&dir->dirHandle, name) == 0);
2498 assert(Create(&dir->dirHandle, name, &fid) == 0);
2501 return 0; /* no need to continue */
2504 if (strcmp(name, ".") == 0) {
2505 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2508 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2511 assert(Delete(&dir->dirHandle, ".") == 0);
2512 fid.Vnode = dir->vnodeNumber;
2513 fid.Unique = dir->unique;
2514 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2517 vnodeNumber = fid.Vnode; /* Get the new Essence */
2518 unique = fid.Unique;
2519 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2522 } else if (strcmp(name, "..") == 0) {
2525 struct VnodeEssence *dotdot;
2526 pa.Vnode = dir->parent;
2527 dotdot = CheckVnodeNumber(pa.Vnode);
2528 assert(dotdot != NULL); /* XXX Should not be assert */
2529 pa.Unique = dotdot->unique;
2531 pa.Vnode = dir->vnodeNumber;
2532 pa.Unique = dir->unique;
2534 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2536 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2539 assert(Delete(&dir->dirHandle, "..") == 0);
2540 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2543 vnodeNumber = pa.Vnode; /* Get the new Essence */
2545 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2547 dir->haveDotDot = 1;
2548 } else if (strncmp(name, ".__afs", 6) == 0) {
2550 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2554 assert(Delete(&dir->dirHandle, name) == 0);
2556 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2557 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2560 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2561 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2562 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2563 && !(vnodeEssence->modeBits & 0111)) {
2569 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2570 vnodeEssence->InodeNumber);
2573 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2577 size = FDH_SIZE(fdP);
2579 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2580 FDH_REALLYCLOSE(fdP);
2587 code = FDH_READ(fdP, buf, size);
2590 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2591 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2592 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2593 Testing ? "would convert" : "converted");
2594 vnodeEssence->modeBits |= 0111;
2595 vnodeEssence->changed = 1;
2596 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2597 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2598 dir->name ? dir->name : "??", name, buf);
2600 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2601 dir->vname, vnodeNumber, size, code);
2603 FDH_REALLYCLOSE(fdP);
2606 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2607 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2608 if (vnodeIdToClass(vnodeNumber) == vLarge
2609 && vnodeEssence->name == NULL) {
2611 if ((n = (char *)malloc(strlen(name) + 1)))
2613 vnodeEssence->name = n;
2616 /* The directory entry points to the vnode. Check to see if the
2617 * vnode points back to the directory. If not, then let the
2618 * directory claim it (else it might end up orphaned). Vnodes
2619 * already claimed by another directory are deleted from this
2620 * directory: hardlinks to the same vnode are not allowed
2621 * from different directories.
2623 if (vnodeEssence->parent != dir->vnodeNumber) {
2624 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2625 /* Vnode does not point back to this directory.
2626 * Orphaned dirs cannot claim a file (it may belong to
2627 * another non-orphaned dir).
2630 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2632 vnodeEssence->parent = dir->vnodeNumber;
2633 vnodeEssence->changed = 1;
2635 /* Vnode was claimed by another directory */
2638 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2639 } else if (vnodeNumber == 1) {
2640 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2642 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2647 assert(Delete(&dir->dirHandle, name) == 0);
2652 /* This directory claims the vnode */
2653 vnodeEssence->claimed = 1;
2655 vnodeEssence->count--;
2660 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2662 register struct VnodeInfo *vip = &vnodeInfo[class];
2663 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2664 char buf[SIZEOF_LARGEDISKVNODE];
2665 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2667 StreamHandle_t *file;
2672 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2673 fdP = IH_OPEN(vip->handle);
2674 assert(fdP != NULL);
2675 file = FDH_FDOPEN(fdP, "r+");
2676 assert(file != NULL);
2677 size = OS_SIZE(fdP->fd_fd);
2679 vip->nVnodes = (size / vcp->diskSize) - 1;
2680 if (vip->nVnodes > 0) {
2681 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2682 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2683 assert((vip->vnodes = (struct VnodeEssence *)
2684 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2685 if (class == vLarge) {
2686 assert((vip->inodes = (Inode *)
2687 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2696 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2697 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2698 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2699 nVnodes--, vnodeIndex++) {
2700 if (vnode->type != vNull) {
2701 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2702 afs_fsize_t vnodeLength;
2703 vip->nAllocatedVnodes++;
2704 vep->count = vnode->linkCount;
2705 VNDISK_GET_LEN(vnodeLength, vnode);
2706 vep->blockCount = nBlocks(vnodeLength);
2707 vip->volumeBlockCount += vep->blockCount;
2708 vep->parent = vnode->parent;
2709 vep->unique = vnode->uniquifier;
2710 if (*maxu < vnode->uniquifier)
2711 *maxu = vnode->uniquifier;
2712 vep->modeBits = vnode->modeBits;
2713 vep->InodeNumber = VNDISK_GET_INO(vnode);
2714 vep->type = vnode->type;
2715 vep->author = vnode->author;
2716 vep->owner = vnode->owner;
2717 vep->group = vnode->group;
2718 if (vnode->type == vDirectory) {
2719 assert(class == vLarge);
2720 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2729 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2731 struct VnodeEssence *parentvp;
2737 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2738 && GetDirName(vp->parent, parentvp, path)) {
2740 strcat(path, vp->name);
2746 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2747 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2750 IsVnodeOrphaned(VnodeId vnode)
2752 struct VnodeEssence *vep;
2755 return (1); /* Vnode zero does not exist */
2757 return (0); /* The root dir vnode is always claimed */
2758 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2759 if (!vep || !vep->claimed)
2760 return (1); /* Vnode is not claimed - it is orphaned */
2762 return (IsVnodeOrphaned(vep->parent));
2766 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2767 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2770 static struct DirSummary dir;
2771 static struct DirHandle dirHandle;
2772 struct VnodeEssence *parent;
2773 static char path[MAXPATHLEN];
2776 if (dirVnodeInfo->vnodes[i].salvaged)
2777 return; /* already salvaged */
2780 dirVnodeInfo->vnodes[i].salvaged = 1;
2782 if (dirVnodeInfo->inodes[i] == 0)
2783 return; /* Not allocated to a directory */
2785 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2786 if (dirVnodeInfo->vnodes[i].parent) {
2787 Log("Bad parent, vnode 1; %s...\n",
2788 (Testing ? "skipping" : "salvaging"));
2789 dirVnodeInfo->vnodes[i].parent = 0;
2790 dirVnodeInfo->vnodes[i].changed = 1;
2793 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2794 if (parent && parent->salvaged == 0)
2795 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2796 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2797 rootdir, rootdirfound);
2800 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2801 dir.unique = dirVnodeInfo->vnodes[i].unique;
2804 dir.parent = dirVnodeInfo->vnodes[i].parent;
2805 dir.haveDot = dir.haveDotDot = 0;
2806 dir.ds_linkH = alinkH;
2807 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2808 dirVnodeInfo->inodes[i]);
2810 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2813 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2814 (Testing ? "skipping" : "salvaging"));
2817 CopyAndSalvage(&dir);
2821 dirHandle = dir.dirHandle;
2824 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2825 &dirVnodeInfo->vnodes[i], path);
2828 /* If enumeration failed for random reasons, we will probably delete
2829 * too much stuff, so we guard against this instead.
2831 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2834 /* Delete the old directory if it was copied in order to salvage.
2835 * CopyOnWrite has written the new inode # to the disk, but we still
2836 * have the old one in our local structure here. Thus, we idec the
2840 if (dir.copied && !Testing) {
2841 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2843 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2846 /* Remember rootdir DirSummary _after_ it has been judged */
2847 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2848 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2856 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2858 /* This routine, for now, will only be called for read-write volumes */
2860 int BlocksInVolume = 0, FilesInVolume = 0;
2861 register VnodeClass class;
2862 struct DirSummary rootdir, oldrootdir;
2863 struct VnodeInfo *dirVnodeInfo;
2864 struct VnodeDiskObject vnode;
2865 VolumeDiskData volHeader;
2867 int orphaned, rootdirfound = 0;
2868 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2869 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2870 struct VnodeEssence *vep;
2873 afs_sfsize_t nBytes;
2875 VnodeId LFVnode, ThisVnode;
2876 Unique LFUnique, ThisUnique;
2879 vid = rwIsp->volSummary->header.id;
2880 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2881 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2882 assert(nBytes == sizeof(volHeader));
2883 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2884 assert(volHeader.destroyMe != DESTROY_ME);
2885 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2887 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2889 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2892 dirVnodeInfo = &vnodeInfo[vLarge];
2893 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2894 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2898 nt_sync(fileSysDevice);
2900 sync(); /* This used to be done lower level, for every dir */
2907 /* Parse each vnode looking for orphaned vnodes and
2908 * connect them to the tree as orphaned (if requested).
2910 oldrootdir = rootdir;
2911 for (class = 0; class < nVNODECLASSES; class++) {
2912 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2913 vep = &(vnodeInfo[class].vnodes[v]);
2914 ThisVnode = bitNumberToVnodeNumber(v, class);
2915 ThisUnique = vep->unique;
2917 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2918 continue; /* Ignore unused, claimed, and root vnodes */
2920 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2921 * entry in this vnode had incremented the parent link count (In
2922 * JudgeEntry()). We need to go to the parent and decrement that
2923 * link count. But if the parent's unique is zero, then the parent
2924 * link count was not incremented in JudgeEntry().
2926 if (class == vLarge) { /* directory vnode */
2927 pv = vnodeIdToBitNumber(vep->parent);
2928 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2929 vnodeInfo[vLarge].vnodes[pv].count++;
2933 continue; /* If no rootdir, can't attach orphaned files */
2935 /* Here we attach orphaned files and directories into the
2936 * root directory, LVVnode, making sure link counts stay correct.
2938 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2939 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2940 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2942 /* Update this orphaned vnode's info. Its parent info and
2943 * link count (do for orphaned directories and files).
2945 vep->parent = LFVnode; /* Parent is the root dir */
2946 vep->unique = LFUnique;
2949 vep->count--; /* Inc link count (root dir will pt to it) */
2951 /* If this orphaned vnode is a directory, change '..'.
2952 * The name of the orphaned dir/file is unknown, so we
2953 * build a unique name. No need to CopyOnWrite the directory
2954 * since it is not connected to tree in BK or RO volume and
2955 * won't be visible there.
2957 if (class == vLarge) {
2961 /* Remove and recreate the ".." entry in this orphaned directory */
2962 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2963 vnodeInfo[class].inodes[v]);
2965 pa.Unique = LFUnique;
2966 assert(Delete(&dh, "..") == 0);
2967 assert(Create(&dh, "..", &pa) == 0);
2969 /* The original parent's link count was decremented above.
2970 * Here we increment the new parent's link count.
2972 pv = vnodeIdToBitNumber(LFVnode);
2973 vnodeInfo[vLarge].vnodes[pv].count--;
2977 /* Go to the root dir and add this entry. The link count of the
2978 * root dir was incremented when ".." was created. Try 10 times.
2980 for (j = 0; j < 10; j++) {
2981 pa.Vnode = ThisVnode;
2982 pa.Unique = ThisUnique;
2984 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
2986 vLarge) ? "__ORPHANDIR__" :
2987 "__ORPHANFILE__"), ThisVnode,
2990 CopyOnWrite(&rootdir);
2991 code = Create(&rootdir.dirHandle, npath, &pa);
2995 ThisUnique += 50; /* Try creating a different file */
2998 Log("Attaching orphaned %s to volume's root dir as %s\n",
2999 ((class == vLarge) ? "directory" : "file"), npath);
3001 } /* for each vnode in the class */
3002 } /* for each class of vnode */
3004 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3006 if (!oldrootdir.copied && rootdir.copied) {
3008 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3011 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3014 DFlush(); /* Flush the changes */
3015 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3016 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3017 orphans = ORPH_IGNORE;
3020 /* Write out all changed vnodes. Orphaned files and directories
3021 * will get removed here also (if requested).
3023 for (class = 0; class < nVNODECLASSES; class++) {
3024 int nVnodes = vnodeInfo[class].nVnodes;
3025 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3026 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3027 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3028 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3029 for (i = 0; i < nVnodes; i++) {
3030 register struct VnodeEssence *vnp = &vnodes[i];
3031 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3033 /* If the vnode is good but is unclaimed (not listed in
3034 * any directory entries), then it is orphaned.
3037 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3038 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3042 if (vnp->changed || vnp->count) {
3046 IH_IREAD(vnodeInfo[class].handle,
3047 vnodeIndexOffset(vcp, vnodeNumber),
3048 (char *)&vnode, sizeof(vnode));
3049 assert(nBytes == sizeof(vnode));
3051 vnode.parent = vnp->parent;
3052 oldCount = vnode.linkCount;
3053 vnode.linkCount = vnode.linkCount - vnp->count;
3056 orphaned = IsVnodeOrphaned(vnodeNumber);
3058 if (!vnp->todelete) {
3059 /* Orphans should have already been attached (if requested) */
3060 assert(orphans != ORPH_ATTACH);
3061 oblocks += vnp->blockCount;
3064 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3066 BlocksInVolume -= vnp->blockCount;
3068 if (VNDISK_GET_INO(&vnode)) {
3070 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3073 memset(&vnode, 0, sizeof(vnode));
3075 } else if (vnp->count) {
3077 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3080 vnode.modeBits = vnp->modeBits;
3083 vnode.dataVersion++;
3086 IH_IWRITE(vnodeInfo[class].handle,
3087 vnodeIndexOffset(vcp, vnodeNumber),
3088 (char *)&vnode, sizeof(vnode));
3089 assert(nBytes == sizeof(vnode));
3095 if (!Showmode && ofiles) {
3096 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3098 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3102 for (class = 0; class < nVNODECLASSES; class++) {
3103 register struct VnodeInfo *vip = &vnodeInfo[class];
3104 for (i = 0; i < vip->nVnodes; i++)
3105 if (vip->vnodes[i].name)
3106 free(vip->vnodes[i].name);
3113 /* Set correct resource utilization statistics */
3114 volHeader.filecount = FilesInVolume;
3115 volHeader.diskused = BlocksInVolume;
3117 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3118 if (volHeader.uniquifier < (maxunique + 1)) {
3120 Log("Volume uniquifier is too low; fixed\n");
3121 /* Plus 2,000 in case there are workstations out there with
3122 * cached vnodes that have since been deleted
3124 volHeader.uniquifier = (maxunique + 1 + 2000);
3127 /* Turn off the inUse bit; the volume's been salvaged! */
3128 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3129 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3130 volHeader.inService = 1; /* allow service again */
3131 volHeader.needsCallback = (VolumeChanged != 0);
3132 volHeader.dontSalvage = DONT_SALVAGE;
3135 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3136 assert(nBytes == sizeof(volHeader));
3139 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3140 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3141 FilesInVolume, BlocksInVolume);
3143 IH_RELEASE(vnodeInfo[vSmall].handle);
3144 IH_RELEASE(vnodeInfo[vLarge].handle);
3150 ClearROInUseBit(struct VolumeSummary *summary)
3152 IHandle_t *h = summary->volumeInfoHandle;
3153 afs_sfsize_t nBytes;
3155 VolumeDiskData volHeader;
3157 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3158 assert(nBytes == sizeof(volHeader));
3159 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3160 volHeader.inUse = 0;
3161 volHeader.needsSalvaged = 0;
3162 volHeader.inService = 1;
3163 volHeader.dontSalvage = DONT_SALVAGE;
3165 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3166 assert(nBytes == sizeof(volHeader));
3171 * Possible delete the volume.
3173 * deleteMe - Always do so, only a partial volume.
3176 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3179 if (readOnly(isp) || deleteMe) {
3180 if (isp->volSummary && isp->volSummary->fileName) {
3183 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3185 Log("It will be deleted on this server (you may find it elsewhere)\n");
3188 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3190 Log("it will be deleted instead. It should be recloned.\n");
3193 unlink(isp->volSummary->fileName);
3195 } else if (!check) {
3196 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3198 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3204 AskOffline(VolumeId volumeId, char * partition)
3208 for (i = 0; i < 3; i++) {
3209 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3211 if (code == SYNC_OK) {
3213 } else if (code == SYNC_DENIED) {
3214 #ifdef DEMAND_ATTACH_ENABLE
3215 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3217 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3219 Abort("Salvage aborted\n");
3220 } else if (code == SYNC_BAD_COMMAND) {
3221 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3223 #ifdef DEMAND_ATTACH_ENABLE
3224 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3226 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3228 Abort("Salvage aborted\n");
3231 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3232 FSYNC_clientFinis();
3236 if (code != SYNC_OK) {
3237 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3238 Abort("Salvage aborted\n");
3243 AskOnline(VolumeId volumeId, char *partition)
3247 for (i = 0; i < 3; i++) {
3248 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3250 if (code == SYNC_OK) {
3252 } else if (code == SYNC_DENIED) {
3253 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3254 } else if (code == SYNC_BAD_COMMAND) {
3255 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3257 #ifdef DEMAND_ATTACH_ENABLE
3258 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3260 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3265 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3266 FSYNC_clientFinis();
3273 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3275 /* Volume parameter is passed in case iopen is upgraded in future to
3276 * require a volume Id to be passed
3279 IHandle_t *srcH, *destH;
3280 FdHandle_t *srcFdP, *destFdP;
3283 IH_INIT(srcH, device, rwvolume, inode1);
3284 srcFdP = IH_OPEN(srcH);
3285 assert(srcFdP != NULL);
3286 IH_INIT(destH, device, rwvolume, inode2);
3287 destFdP = IH_OPEN(destH);
3289 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3290 assert(FDH_WRITE(destFdP, buf, n) == n);
3292 FDH_REALLYCLOSE(srcFdP);
3293 FDH_REALLYCLOSE(destFdP);
3300 PrintInodeList(void)
3302 register struct ViceInodeInfo *ip;
3303 struct ViceInodeInfo *buf;
3304 struct afs_stat status;
3305 register int nInodes;
3307 assert(afs_fstat(inodeFd, &status) == 0);
3308 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3309 assert(buf != NULL);
3310 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3311 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3312 for (ip = buf; nInodes--; ip++) {
3313 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3314 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3315 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3316 ip->u.param[2], ip->u.param[3]);
3322 PrintInodeSummary(void)
3325 struct InodeSummary *isp;
3327 for (i = 0; i < nVolumesInInodeFile; i++) {
3328 isp = &inodeSummary[i];
3329 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3334 PrintVolumeSummary(void)
3337 struct VolumeSummary *vsp;
3339 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3340 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3350 assert(0); /* Fork is never executed in the NT code path */
3354 #ifdef AFS_DEMAND_ATTACH_FS
3355 if ((f == 0) && (programType == salvageServer)) {
3356 /* we are a salvageserver child */
3357 #ifdef FSSYNC_BUILD_CLIENT
3358 VChildProcReconnectFS_r();
3360 #ifdef SALVSYNC_BUILD_CLIENT
3364 #endif /* AFS_DEMAND_ATTACH_FS */
3365 #endif /* !AFS_NT40_ENV */
3375 #ifdef AFS_DEMAND_ATTACH_FS
3376 if (programType == salvageServer) {
3377 #ifdef SALVSYNC_BUILD_CLIENT
3380 #ifdef FSSYNC_BUILD_CLIENT
3384 #endif /* AFS_DEMAND_ATTACH_FS */
3387 if (main_thread != pthread_self())
3388 pthread_exit((void *)code);
3401 pid = wait(&status);
3403 if (WCOREDUMP(status))
3404 Log("\"%s\" core dumped!\n", prog);
3405 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3411 TimeStamp(time_t clock, int precision)
3414 static char timestamp[20];
3415 lt = localtime(&clock);
3417 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3419 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3424 CheckLogFile(char * log_path)
3426 char oldSlvgLog[AFSDIR_PATH_MAX];
3428 #ifndef AFS_NT40_ENV
3435 strcpy(oldSlvgLog, log_path);
3436 strcat(oldSlvgLog, ".old");
3438 renamefile(log_path, oldSlvgLog);
3439 logFile = afs_fopen(log_path, "a");
3441 if (!logFile) { /* still nothing, use stdout */
3445 #ifndef AFS_NAMEI_ENV
3446 AFS_DEBUG_IOPS_LOG(logFile);
3451 #ifndef AFS_NT40_ENV
3453 TimeStampLogFile(char * log_path)
3455 char stampSlvgLog[AFSDIR_PATH_MAX];
3460 lt = localtime(&now);
3461 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3462 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3463 log_path, lt->tm_year + 1900,
3464 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3467 /* try to link the logfile to a timestamped filename */
3468 /* if it fails, oh well, nothing we can do */
3469 link(log_path, stampSlvgLog);
3478 #ifndef AFS_NT40_ENV
3480 printf("Can't show log since using syslog.\n");
3489 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3492 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3495 while (fgets(line, sizeof(line), logFile))
3502 Log(const char *format, ...)
3508 va_start(args, format);
3509 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3511 #ifndef AFS_NT40_ENV
3513 syslog(LOG_INFO, "%s", tmp);
3517 gettimeofday(&now, 0);
3518 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3524 Abort(const char *format, ...)
3529 va_start(args, format);
3530 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3532 #ifndef AFS_NT40_ENV
3534 syslog(LOG_INFO, "%s", tmp);
3538 fprintf(logFile, "%s", tmp);
3553 p = (char *)malloc(strlen(s) + 1);
3559 /* Remove the FORCESALVAGE file */
3561 RemoveTheForce(char *path)
3564 struct afs_stat force; /* so we can use afs_stat to find it */
3565 strcpy(target,path);
3566 strcat(target,"/FORCESALVAGE");
3567 if (!Testing && ForceSalvage) {
3568 if (afs_stat(target,&force) == 0) unlink(target);
3572 #ifndef AFS_AIX32_ENV
3574 * UseTheForceLuke - see if we can use the force
3577 UseTheForceLuke(char *path)
3579 struct afs_stat force;
3581 strcpy(target,path);
3582 strcat(target,"/FORCESALVAGE");
3584 return (afs_stat(target, &force) == 0);
3588 * UseTheForceLuke - see if we can use the force
3591 * The VRMIX fsck will not muck with the filesystem it is supposedly
3592 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3593 * muck directly with the root inode, which is within the normal
3595 * ListViceInodes() has a side effect of setting ForceSalvage if
3596 * it detects a need, based on root inode examination.
3599 UseTheForceLuke(char *path)
3602 return 0; /* sorry OB1 */
3607 /* NT support routines */
3609 static char execpathname[MAX_PATH];
3611 nt_SalvagePartition(char *partName, int jobn)
3616 if (!*execpathname) {
3617 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3618 if (!n || n == 1023)
3621 job.cj_magic = SALVAGER_MAGIC;
3622 job.cj_number = jobn;
3623 (void)strcpy(job.cj_part, partName);
3624 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3629 nt_SetupPartitionSalvage(void *datap, int len)
3631 childJob_t *jobp = (childJob_t *) datap;
3632 char logname[AFSDIR_PATH_MAX];
3634 if (len != sizeof(childJob_t))
3636 if (jobp->cj_magic != SALVAGER_MAGIC)
3641 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3643 logFile = afs_fopen(logname, "w");
3651 #endif /* AFS_NT40_ENV */