2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
108 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
109 #define WCOREDUMP(x) (x & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
197 /*@+fcnmacros +macrofcndecl@*/
200 extern off64_t afs_lseek(int FD, off64_t O, int F);
201 #endif /*S_SPLINT_S */
202 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
203 #define afs_stat stat64
204 #define afs_fstat fstat64
205 #define afs_open open64
206 #define afs_fopen fopen64
207 #else /* !O_LARGEFILE */
209 extern off_t afs_lseek(int FD, off_t O, int F);
210 #endif /*S_SPLINT_S */
211 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
212 #define afs_stat stat
213 #define afs_fstat fstat
214 #define afs_open open
215 #define afs_fopen fopen
216 #endif /* !O_LARGEFILE */
217 /*@=fcnmacros =macrofcndecl@*/
220 extern void *calloc();
222 static char *TimeStamp(time_t clock, int precision);
225 int debug; /* -d flag */
226 extern int Testing; /* -n flag */
227 int ListInodeOption; /* -i flag */
228 int ShowRootFiles; /* -r flag */
229 int RebuildDirs; /* -sal flag */
230 int Parallel = 4; /* -para X flag */
231 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
232 int forceR = 0; /* -b flag */
233 int ShowLog = 0; /* -showlog flag */
234 int ShowSuid = 0; /* -showsuid flag */
235 int ShowMounts = 0; /* -showmounts flag */
236 int orphans = ORPH_IGNORE; /* -orphans option */
241 int useSyslog = 0; /* -syslog flag */
242 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
251 #define MAXPARALLEL 32
253 int OKToZap; /* -o flag */
254 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
255 * in the volume header */
257 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
259 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
261 Device fileSysDevice; /* The device number of the current
262 * partition being salvaged */
266 char *fileSysPath; /* The path of the mounted partition currently
267 * being salvaged, i.e. the directory
268 * containing the volume headers */
270 char *fileSysPathName; /* NT needs this to make name pretty in log. */
271 IHandle_t *VGLinkH; /* Link handle for current volume group. */
272 int VGLinkH_cnt; /* # of references to lnk handle. */
273 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
275 char *fileSysDeviceName; /* The block device where the file system
276 * being salvaged was mounted */
277 char *filesysfulldev;
279 int VolumeChanged; /* Set by any routine which would change the volume in
280 * a way which would require callback is to be broken if the
281 * volume was put back on line by an active file server */
283 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
285 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
286 int inodeFd; /* File descriptor for inode file */
289 struct VnodeInfo vnodeInfo[nVNODECLASSES];
292 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
293 int nVolumes; /* Number of volumes (read-write and read-only)
294 * in volume summary */
300 /* Forward declarations */
301 /*@printflike@*/ void Log(const char *format, ...);
302 /*@printflike@*/ void Abort(const char *format, ...);
303 static int IsVnodeOrphaned(VnodeId vnode);
305 /* Uniquifier stored in the Inode */
310 return (u & 0x3fffff);
312 #if defined(AFS_SGI_EXMAG)
313 return (u & SGI_UNIQMASK);
316 #endif /* AFS_SGI_EXMAG */
321 BadError(register int aerror)
323 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
325 return 0; /* otherwise may be transient, e.g. EMFILE */
330 char *save_args[MAX_ARGS];
332 extern pthread_t main_thread;
333 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
336 /* Get the salvage lock if not already held. Hold until process exits. */
338 ObtainSalvageLock(void)
344 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
345 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
346 if (salvageLock == INVALID_FD) {
348 "salvager: There appears to be another salvager running! Aborted.\n");
353 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
354 if (salvageLock < 0) {
356 "salvager: can't open salvage lock file %s, aborting\n",
357 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
360 #ifdef AFS_DARWIN_ENV
361 if (flock(salvageLock, LOCK_EX) == -1) {
363 if (lockf(salvageLock, F_LOCK, 0) == -1) {
366 "salvager: There appears to be another salvager running! Aborted.\n");
373 #ifdef AFS_SGI_XFS_IOPS_ENV
374 /* Check if the given partition is mounted. For XFS, the root inode is not a
375 * constant. So we check the hard way.
378 IsPartitionMounted(char *part)
381 struct mntent *mntent;
383 assert(mntfp = setmntent(MOUNTED, "r"));
384 while (mntent = getmntent(mntfp)) {
385 if (!strcmp(part, mntent->mnt_dir))
390 return mntent ? 1 : 1;
393 /* Check if the given inode is the root of the filesystem. */
394 #ifndef AFS_SGI_XFS_IOPS_ENV
396 IsRootInode(struct afs_stat *status)
399 * The root inode is not a fixed value in XFS partitions. So we need to
400 * see if the partition is in the list of mounted partitions. This only
401 * affects the SalvageFileSys path, so we check there.
403 return (status->st_ino == ROOTINODE);
408 #ifndef AFS_NAMEI_ENV
409 /* We don't want to salvage big files filesystems, since we can't put volumes on
413 CheckIfBigFilesFS(char *mountPoint, char *devName)
415 struct superblock fs;
418 if (strncmp(devName, "/dev/", 5)) {
419 (void)sprintf(name, "/dev/%s", devName);
421 (void)strcpy(name, devName);
424 if (ReadSuper(&fs, name) < 0) {
425 Log("Unable to read superblock. Not salvaging partition %s.\n",
429 if (IsBigFilesFileSystem(&fs)) {
430 Log("Partition %s is a big files filesystem, not salvaging.\n",
440 #define HDSTR "\\Device\\Harddisk"
441 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
443 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
448 static int dowarn = 1;
450 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
452 if (strncmp(res, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res, HDSTR, p1->devName);
460 d1 = atoi(&res[HDLEN]);
462 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
464 if (strncmp(res, HDSTR, HDLEN)) {
467 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
468 res, HDSTR, p2->devName);
472 d2 = atoi(&res[HDLEN]);
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, '/');
698 ptr = (char *)strrchr(pbuffer, '/');
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 static char tmpDevName[100];
712 static char wpath[100];
713 struct VolumeSummary *vsp, *esp;
716 fileSysPartition = partP;
717 fileSysDevice = fileSysPartition->device;
718 fileSysPathName = VPartitionPath(fileSysPartition);
721 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
722 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
723 name = partP->devName;
725 fileSysPath = fileSysPathName;
726 strcpy(tmpDevName, partP->devName);
727 name = get_DevName(tmpDevName, wpath);
728 fileSysDeviceName = name;
729 filesysfulldev = wpath;
732 VLockPartition(partP->name);
733 if (singleVolumeNumber || ForceSalvage)
736 ForceSalvage = UseTheForceLuke(fileSysPath);
738 if (singleVolumeNumber) {
739 /* salvageserver already setup fssync conn for us */
740 if ((programType != salvageServer) && !VConnectFS()) {
741 Abort("Couldn't connect to file server\n");
743 AskOffline(singleVolumeNumber, partP->name);
746 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
747 partP->name, name, (Testing ? "(READONLY mode)" : ""));
749 Log("***Forced salvage of all volumes on this partition***\n");
754 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
761 assert((dirp = opendir(fileSysPath)) != NULL);
762 while ((dp = readdir(dirp))) {
763 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
764 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
766 Log("Removing old salvager temp files %s\n", dp->d_name);
767 strcpy(npath, fileSysPath);
769 strcat(npath, dp->d_name);
775 tdir = (tmpdir ? tmpdir : fileSysPath);
777 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
778 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
780 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
783 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
784 unlink(inodeListPath);
788 /* Using nt_unlink here since we're really using the delete on close
789 * semantics of unlink. In most places in the salvager, we really do
790 * mean to unlink the file at that point. Those places have been
791 * modified to actually do that so that the NT crt can be used there.
794 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
795 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
797 inodeFd = afs_open(inodeListPath, O_RDONLY);
798 unlink(inodeListPath);
801 Abort("Temporary file %s is missing...\n", inodeListPath);
802 if (ListInodeOption) {
806 /* enumerate volumes in the partition.
807 * figure out sets of read-only + rw volumes.
808 * salvage each set, read-only volumes first, then read-write.
809 * Fix up inodes on last volume in set (whether it is read-write
812 GetVolumeSummary(singleVolumeNumber);
814 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
815 i < nVolumesInInodeFile; i = j) {
816 VolumeId rwvid = inodeSummary[i].RWvolumeId;
818 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
820 VolumeId vid = inodeSummary[j].volumeId;
821 struct VolumeSummary *tsp;
822 /* Scan volume list (from partition root directory) looking for the
823 * current rw volume number in the volume list from the inode scan.
824 * If there is one here that is not in the inode volume list,
826 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
828 DeleteExtraVolumeHeaderFile(vsp);
830 /* Now match up the volume summary info from the root directory with the
831 * entry in the volume list obtained from scanning inodes */
832 inodeSummary[j].volSummary = NULL;
833 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
834 if (tsp->header.id == vid) {
835 inodeSummary[j].volSummary = tsp;
841 /* Salvage the group of volumes (several read-only + 1 read/write)
842 * starting with the current read-only volume we're looking at.
844 SalvageVolumeGroup(&inodeSummary[i], j - i);
847 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
848 for (; vsp < esp; vsp++) {
850 DeleteExtraVolumeHeaderFile(vsp);
853 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
854 RemoveTheForce(fileSysPath);
856 if (!Testing && singleVolumeNumber) {
857 AskOnline(singleVolumeNumber, fileSysPartition->name);
859 /* Step through the volumeSummary list and set all volumes on-line.
860 * The volumes were taken off-line in GetVolumeSummary.
862 for (j = 0; j < nVolumes; j++) {
863 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
867 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
868 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
871 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
875 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
878 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
881 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
884 Log("Unable to unlink %s (errno = %d)\n", path, errno);
891 CompareInodes(const void *_p1, const void *_p2)
893 register const struct ViceInodeInfo *p1 = _p1;
894 register const struct ViceInodeInfo *p2 = _p2;
895 if (p1->u.vnode.vnodeNumber == INODESPECIAL
896 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
897 VolumeId p1rwid, p2rwid;
899 (p1->u.vnode.vnodeNumber ==
900 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
902 (p2->u.vnode.vnodeNumber ==
903 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
908 if (p1->u.vnode.vnodeNumber == INODESPECIAL
909 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
910 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
911 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
912 if (p1->u.vnode.volumeId == p1rwid)
914 if (p2->u.vnode.volumeId == p2rwid)
916 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
918 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
919 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
920 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
922 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
924 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
926 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
928 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
930 /* The following tests are reversed, so that the most desirable
931 * of several similar inodes comes first */
932 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
934 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
935 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
939 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
940 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
945 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
947 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
948 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
952 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
953 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
958 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
960 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
961 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
965 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
966 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
971 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
973 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
974 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
978 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
979 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
988 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
989 register struct InodeSummary *summary)
991 VolumeId volume = ip->u.vnode.volumeId;
992 VolumeId rwvolume = volume;
993 register int n, nSpecial;
994 register Unique maxunique;
997 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
999 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1001 rwvolume = ip->u.special.parentId;
1002 /* This isn't quite right, as there could (in error) be different
1003 * parent inodes in different special vnodes */
1005 if (maxunique < ip->u.vnode.vnodeUniquifier)
1006 maxunique = ip->u.vnode.vnodeUniquifier;
1010 summary->volumeId = volume;
1011 summary->RWvolumeId = rwvolume;
1012 summary->nInodes = n;
1013 summary->nSpecialInodes = nSpecial;
1014 summary->maxUniquifier = maxunique;
1018 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1020 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1021 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1022 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1027 * Collect list of inodes in file named by path. If a truly fatal error,
1028 * unlink the file and abort. For lessor errors, return -1. The file will
1029 * be unlinked by the caller.
1032 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1034 struct afs_stat status;
1036 struct ViceInodeInfo *ip;
1037 struct InodeSummary summary;
1038 char summaryFileName[50];
1041 char *dev = fileSysPath;
1042 char *wpath = fileSysPath;
1044 char *dev = fileSysDeviceName;
1045 char *wpath = filesysfulldev;
1047 char *part = fileSysPath;
1050 /* This file used to come from vfsck; cobble it up ourselves now... */
1052 ListViceInodes(dev, fileSysPath, path,
1053 singleVolumeNumber ? OnlyOneVolume : 0,
1054 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1056 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1060 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1062 if (forceSal && !ForceSalvage) {
1063 Log("***Forced salvage of all volumes on this partition***\n");
1066 inodeFd = afs_open(path, O_RDWR);
1067 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1069 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1071 tdir = (tmpdir ? tmpdir : part);
1073 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1074 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1076 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1077 "%s/salvage.temp.%d", tdir, getpid());
1079 summaryFile = afs_fopen(summaryFileName, "a+");
1080 if (summaryFile == NULL) {
1083 Abort("Unable to create inode summary file\n");
1085 if (!canfork || debug || Fork() == 0) {
1087 unsigned long st_size=(unsigned long) status.st_size;
1088 nInodes = st_size / sizeof(struct ViceInodeInfo);
1090 fclose(summaryFile);
1092 unlink(summaryFileName);
1093 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1094 RemoveTheForce(fileSysPath);
1096 struct VolumeSummary *vsp;
1099 GetVolumeSummary(singleVolumeNumber);
1101 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1103 DeleteExtraVolumeHeaderFile(vsp);
1106 Log("%s vice inodes on %s; not salvaged\n",
1107 singleVolumeNumber ? "No applicable" : "No", dev);
1110 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1112 fclose(summaryFile);
1115 unlink(summaryFileName);
1117 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1120 if (read(inodeFd, ip, st_size) != st_size) {
1121 fclose(summaryFile);
1124 unlink(summaryFileName);
1125 Abort("Unable to read inode table; %s not salvaged\n", dev);
1127 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1128 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1129 || write(inodeFd, ip, st_size) != st_size) {
1130 fclose(summaryFile);
1133 unlink(summaryFileName);
1134 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1138 CountVolumeInodes(ip, nInodes, &summary);
1139 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1140 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1141 fclose(summaryFile);
1145 summary.index += (summary.nInodes);
1146 nInodes -= summary.nInodes;
1147 ip += summary.nInodes;
1149 /* Following fflush is not fclose, because if it was debug mode would not work */
1150 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1151 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1152 fclose(summaryFile);
1156 if (canfork && !debug) {
1161 if (Wait("Inode summary") == -1) {
1162 fclose(summaryFile);
1165 unlink(summaryFileName);
1166 Exit(1); /* salvage of this partition aborted */
1169 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1170 if (status.st_size != 0) {
1172 unsigned long st_status=(unsigned long)status.st_size;
1173 inodeSummary = (struct InodeSummary *)malloc(st_status);
1174 assert(inodeSummary != NULL);
1175 /* For GNU we need to do lseek to get the file pointer moved. */
1176 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1177 ret = read(fileno(summaryFile), inodeSummary, st_status);
1178 assert(ret == st_status);
1180 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1181 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1182 fclose(summaryFile);
1184 unlink(summaryFileName);
1188 /* Comparison routine for volume sort.
1189 This is setup so that a read-write volume comes immediately before
1190 any read-only clones of that volume */
1192 CompareVolumes(const void *_p1, const void *_p2)
1194 register const struct VolumeSummary *p1 = _p1;
1195 register const struct VolumeSummary *p2 = _p2;
1196 if (p1->header.parent != p2->header.parent)
1197 return p1->header.parent < p2->header.parent ? -1 : 1;
1198 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1200 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1202 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1206 GetVolumeSummary(VolumeId singleVolumeNumber)
1209 afs_int32 nvols = 0;
1210 struct VolumeSummary *vsp, vs;
1211 struct VolumeDiskHeader diskHeader;
1214 /* Get headers from volume directory */
1215 dirp = opendir(fileSysPath);
1217 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1218 if (!singleVolumeNumber) {
1219 while ((dp = readdir(dirp))) {
1220 char *p = dp->d_name;
1221 p = strrchr(dp->d_name, '.');
1222 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1225 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1226 if ((fd = afs_open(name, O_RDONLY)) != -1
1227 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1228 == sizeof(diskHeader)
1229 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1230 DiskToVolumeHeader(&vs.header, &diskHeader);
1238 dirp = opendir("."); /* No rewinddir for NT */
1245 (struct VolumeSummary *)malloc(nvols *
1246 sizeof(struct VolumeSummary));
1249 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1250 assert(volumeSummaryp != NULL);
1253 vsp = volumeSummaryp;
1254 while ((dp = readdir(dirp))) {
1255 char *p = dp->d_name;
1256 p = strrchr(dp->d_name, '.');
1257 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1261 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1262 if ((fd = afs_open(name, O_RDONLY)) == -1
1263 || read(fd, &diskHeader, sizeof(diskHeader))
1264 != sizeof(diskHeader)
1265 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1270 if (!singleVolumeNumber) {
1272 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1275 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1280 char nameShouldBe[64];
1281 DiskToVolumeHeader(&vsp->header, &diskHeader);
1282 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1283 && vsp->header.parent != singleVolumeNumber) {
1284 if (programType == salvageServer) {
1285 #ifdef SALVSYNC_BUILD_CLIENT
1286 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1287 vsp->header.id, vsp->header.parent);
1288 if (SALVSYNC_LinkVolume(vsp->header.parent,
1290 fileSysPartition->name,
1292 Log("schedule request failed\n");
1295 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1297 Log("%u is a read-only volume; not salvaged\n",
1298 singleVolumeNumber);
1302 if (!singleVolumeNumber
1303 || (vsp->header.id == singleVolumeNumber
1304 || vsp->header.parent == singleVolumeNumber)) {
1305 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1306 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1307 if (singleVolumeNumber
1308 && vsp->header.id != singleVolumeNumber)
1309 AskOffline(vsp->header.id, fileSysPartition->name);
1310 if (strcmp(nameShouldBe, dp->d_name)) {
1312 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", name, (Testing ? "it would have been " : ""));
1315 Log("Unable to unlink %s (errno = %d)\n", name, errno);
1319 vsp->fileName = ToString(dp->d_name);
1329 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1333 /* Find the link table. This should be associated with the RW volume or, if
1334 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1337 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1338 struct ViceInodeInfo *allInodes)
1341 struct ViceInodeInfo *ip;
1343 for (i = 0; i < nVols; i++) {
1344 ip = allInodes + isp[i].index;
1345 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1346 if (ip[j].u.special.type == VI_LINKTABLE)
1347 return ip[j].inodeNumber;
1354 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1356 struct versionStamp version;
1359 if (!VALID_INO(ino))
1361 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1362 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1363 if (!VALID_INO(ino))
1365 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1366 isp->RWvolumeId, errno);
1367 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1368 fdP = IH_OPEN(VGLinkH);
1370 Abort("Can't open link table for volume %u (error = %d)\n",
1371 isp->RWvolumeId, errno);
1373 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1374 Abort("Can't truncate link table for volume %u (error = %d)\n",
1375 isp->RWvolumeId, errno);
1377 version.magic = LINKTABLEMAGIC;
1378 version.version = LINKTABLEVERSION;
1380 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1382 Abort("Can't truncate link table for volume %u (error = %d)\n",
1383 isp->RWvolumeId, errno);
1385 FDH_REALLYCLOSE(fdP);
1387 /* If the volume summary exits (i.e., the V*.vol header file exists),
1388 * then set this inode there as well.
1390 if (isp->volSummary)
1391 isp->volSummary->header.linkTable = ino;
1400 SVGParms_t *parms = (SVGParms_t *) arg;
1401 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1406 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1409 pthread_attr_t tattr;
1413 /* Initialize per volume global variables, even if later code does so */
1417 memset(&VolInfo, 0, sizeof(VolInfo));
1419 parms.svgp_inodeSummaryp = isp;
1420 parms.svgp_count = nVols;
1421 code = pthread_attr_init(&tattr);
1423 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1427 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1429 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1432 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1434 Log("Failed to create thread to salvage volume group %u\n",
1438 (void)pthread_join(tid, NULL);
1440 #endif /* AFS_NT40_ENV */
1443 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1445 struct ViceInodeInfo *inodes, *allInodes, *ip;
1446 int i, totalInodes, size, salvageTo;
1450 int dec_VGLinkH = 0;
1452 FdHandle_t *fdP = NULL;
1455 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1456 && isp->nSpecialInodes > 0);
1457 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1458 if (!ForceSalvage && QuickCheck(isp, nVols))
1461 if (ShowMounts && !haveRWvolume)
1463 if (canfork && !debug && Fork() != 0) {
1464 (void)Wait("Salvage volume group");
1467 for (i = 0, totalInodes = 0; i < nVols; i++)
1468 totalInodes += isp[i].nInodes;
1469 size = totalInodes * sizeof(struct ViceInodeInfo);
1470 inodes = (struct ViceInodeInfo *)malloc(size);
1471 allInodes = inodes - isp->index; /* this would the base of all the inodes
1472 * for the partition, if all the inodes
1473 * had been read into memory */
1475 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1477 assert(read(inodeFd, inodes, size) == size);
1479 /* Don't try to salvage a read write volume if there isn't one on this
1481 salvageTo = haveRWvolume ? 0 : 1;
1483 #ifdef AFS_NAMEI_ENV
1484 ino = FindLinkHandle(isp, nVols, allInodes);
1485 if (VALID_INO(ino)) {
1486 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1487 fdP = IH_OPEN(VGLinkH);
1489 if (!VALID_INO(ino) || fdP == NULL) {
1490 Log("%s link table for volume %u.\n",
1491 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1493 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1496 struct ViceInodeInfo *ip;
1497 CreateLinkTable(isp, ino);
1498 fdP = IH_OPEN(VGLinkH);
1499 /* Sync fake 1 link counts to the link table, now that it exists */
1501 for (i = 0; i < nVols; i++) {
1502 ip = allInodes + isp[i].index;
1503 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1505 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1507 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1515 FDH_REALLYCLOSE(fdP);
1517 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1520 /* Salvage in reverse order--read/write volume last; this way any
1521 * Inodes not referenced by the time we salvage the read/write volume
1522 * can be picked up by the read/write volume */
1523 /* ACTUALLY, that's not done right now--the inodes just vanish */
1524 for (i = nVols - 1; i >= salvageTo; i--) {
1526 struct InodeSummary *lisp = &isp[i];
1527 #ifdef AFS_NAMEI_ENV
1528 /* If only the RO is present on this partition, the link table
1529 * shows up as a RW volume special file. Need to make sure the
1530 * salvager doesn't try to salvage the non-existent RW.
1532 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1533 /* If this only special inode is the link table, continue */
1534 if (inodes->u.special.type == VI_LINKTABLE) {
1541 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1542 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1543 /* Check inodes twice. The second time do things seriously. This
1544 * way the whole RO volume can be deleted, below, if anything goes wrong */
1545 for (check = 1; check >= 0; check--) {
1547 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1549 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1550 if (rw && deleteMe) {
1551 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1552 * volume won't be called */
1558 if (rw && check == 1)
1560 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1561 MaybeZapVolume(lisp, "Vnode index", 0, check);
1567 /* Fix actual inode counts */
1569 Log("totalInodes %d\n",totalInodes);
1570 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1571 static int TraceBadLinkCounts = 0;
1572 #ifdef AFS_NAMEI_ENV
1573 if (VGLinkH->ih_ino == ip->inodeNumber) {
1574 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1575 VGLinkH_p1 = ip->u.param[0];
1576 continue; /* Deal with this last. */
1579 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1580 TraceBadLinkCounts--; /* Limit reports, per volume */
1581 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1583 while (ip->linkCount > 0) {
1584 /* below used to assert, not break */
1586 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1587 Log("idec failed. inode %s errno %d\n",
1588 PrintInode(NULL, ip->inodeNumber), errno);
1594 while (ip->linkCount < 0) {
1595 /* these used to be asserts */
1597 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1598 Log("iinc failed. inode %s errno %d\n",
1599 PrintInode(NULL, ip->inodeNumber), errno);
1606 #ifdef AFS_NAMEI_ENV
1607 while (dec_VGLinkH > 0) {
1608 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1609 Log("idec failed on link table, errno = %d\n", errno);
1613 while (dec_VGLinkH < 0) {
1614 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1615 Log("iinc failed on link table, errno = %d\n", errno);
1622 /* Directory consistency checks on the rw volume */
1624 SalvageVolume(isp, VGLinkH);
1625 IH_RELEASE(VGLinkH);
1627 if (canfork && !debug) {
1634 QuickCheck(register struct InodeSummary *isp, int nVols)
1636 /* Check headers BEFORE forking */
1640 for (i = 0; i < nVols; i++) {
1641 struct VolumeSummary *vs = isp[i].volSummary;
1642 VolumeDiskData volHeader;
1644 /* Don't salvage just because phantom rw volume is there... */
1645 /* (If a read-only volume exists, read/write inodes must also exist) */
1646 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1650 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1651 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1652 == sizeof(volHeader)
1653 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1654 && volHeader.dontSalvage == DONT_SALVAGE
1655 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1656 if (volHeader.inUse != 0) {
1657 volHeader.inUse = 0;
1658 volHeader.inService = 1;
1660 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1661 != sizeof(volHeader)) {
1677 /* SalvageVolumeHeaderFile
1679 * Salvage the top level V*.vol header file. Make sure the special files
1680 * exist and that there are no duplicates.
1682 * Calls SalvageHeader for each possible type of volume special file.
1686 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1687 register struct ViceInodeInfo *inodes, int RW,
1688 int check, int *deleteMe)
1692 register struct ViceInodeInfo *ip;
1693 int allinodesobsolete = 1;
1694 struct VolumeDiskHeader diskHeader;
1698 memset(&tempHeader, 0, sizeof(tempHeader));
1699 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1700 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1701 tempHeader.id = isp->volumeId;
1702 tempHeader.parent = isp->RWvolumeId;
1703 /* Check for duplicates (inodes are sorted by type field) */
1704 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1705 ip = &inodes[isp->index + i];
1706 if (ip->u.special.type == (ip + 1)->u.special.type) {
1708 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1712 for (i = 0; i < isp->nSpecialInodes; i++) {
1713 ip = &inodes[isp->index + i];
1714 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1716 Log("Rubbish header inode\n");
1719 Log("Rubbish header inode; deleted\n");
1720 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1721 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1722 if (!check && ip->u.special.type != VI_LINKTABLE)
1723 ip->linkCount--; /* Keep the inode around */
1724 allinodesobsolete = 0;
1728 if (allinodesobsolete) {
1735 VGLinkH_cnt++; /* one for every header. */
1737 if (!RW && !check && isp->volSummary) {
1738 ClearROInUseBit(isp->volSummary);
1742 for (i = 0; i < MAXINODETYPE; i++) {
1743 if (stuff[i].inodeType == VI_LINKTABLE) {
1744 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1745 * And we may have recreated the link table earlier, so set the
1746 * RW header as well.
1748 if (VALID_INO(VGLinkH->ih_ino)) {
1749 *stuff[i].inode = VGLinkH->ih_ino;
1753 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1757 if (isp->volSummary == NULL) {
1759 char headerName[64];
1760 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1761 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1763 Log("No header file for volume %u\n", isp->volumeId);
1767 Log("No header file for volume %u; %screating %s\n",
1768 isp->volumeId, (Testing ? "it would have been " : ""),
1770 headerFd = afs_open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
1771 assert(headerFd != -1);
1772 isp->volSummary = (struct VolumeSummary *)
1773 malloc(sizeof(struct VolumeSummary));
1774 isp->volSummary->fileName = ToString(headerName);
1777 char headerName[64];
1778 /* hack: these two fields are obsolete... */
1779 isp->volSummary->header.volumeAcl = 0;
1780 isp->volSummary->header.volumeMountTable = 0;
1783 (&isp->volSummary->header, &tempHeader,
1784 sizeof(struct VolumeHeader))) {
1785 /* We often remove the name before calling us, so we make a fake one up */
1786 if (isp->volSummary->fileName) {
1787 strcpy(headerName, isp->volSummary->fileName);
1789 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1790 isp->volSummary->fileName = ToString(headerName);
1792 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1794 Log("Header file %s is damaged or no longer valid%s\n", path,
1795 (check ? "" : "; repairing"));
1799 headerFd = afs_open(path, O_RDWR | O_TRUNC, 0644);
1800 assert(headerFd != -1);
1804 memcpy(&isp->volSummary->header, &tempHeader,
1805 sizeof(struct VolumeHeader));
1808 Log("It would have written a new header file for volume %u\n",
1811 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1812 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1813 != sizeof(struct VolumeDiskHeader)) {
1814 Log("Couldn't rewrite volume header file!\n");
1821 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1822 isp->volSummary->header.volumeInfo);
1827 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1831 VolumeDiskData volumeInfo;
1832 struct versionStamp fileHeader;
1841 #ifndef AFS_NAMEI_ENV
1842 if (sp->inodeType == VI_LINKTABLE)
1845 if (*(sp->inode) == 0) {
1847 Log("Missing inode in volume header (%s)\n", sp->description);
1851 Log("Missing inode in volume header (%s); %s\n", sp->description,
1852 (Testing ? "it would have recreated it" : "recreating"));
1855 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1856 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1857 if (!VALID_INO(*(sp->inode)))
1859 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1860 sp->description, errno);
1865 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1866 fdP = IH_OPEN(specH);
1867 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1868 /* bail out early and destroy the volume */
1870 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1877 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1878 sp->description, errno);
1881 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1882 || header.fileHeader.magic != sp->stamp.magic)) {
1884 Log("Part of the header (%s) is corrupted\n", sp->description);
1885 FDH_REALLYCLOSE(fdP);
1889 Log("Part of the header (%s) is corrupted; recreating\n",
1893 if (sp->inodeType == VI_VOLINFO
1894 && header.volumeInfo.destroyMe == DESTROY_ME) {
1897 FDH_REALLYCLOSE(fdP);
1901 if (recreate && !Testing) {
1904 ("Internal error: recreating volume header (%s) in check mode\n",
1906 code = FDH_TRUNC(fdP, 0);
1908 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1909 sp->description, errno);
1911 /* The following code should be moved into vutil.c */
1912 if (sp->inodeType == VI_VOLINFO) {
1914 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1915 header.volumeInfo.stamp = sp->stamp;
1916 header.volumeInfo.id = isp->volumeId;
1917 header.volumeInfo.parentId = isp->RWvolumeId;
1918 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1919 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1920 isp->volumeId, isp->volumeId);
1921 header.volumeInfo.inService = 0;
1922 header.volumeInfo.blessed = 0;
1923 /* The + 1000 is a hack in case there are any files out in venus caches */
1924 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1925 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1926 header.volumeInfo.needsCallback = 0;
1927 gettimeofday(&tp, 0);
1928 header.volumeInfo.creationDate = tp.tv_sec;
1929 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1931 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1932 sp->description, errno);
1935 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1936 sizeof(header.volumeInfo));
1937 if (code != sizeof(header.volumeInfo)) {
1940 ("Unable to write volume header file (%s) (errno = %d)\n",
1941 sp->description, errno);
1942 Abort("Unable to write entire volume header file (%s)\n",
1946 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1948 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1949 sp->description, errno);
1951 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1952 if (code != sizeof(sp->stamp)) {
1955 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1956 sp->description, errno);
1958 ("Unable to write entire version stamp in volume header file (%s)\n",
1963 FDH_REALLYCLOSE(fdP);
1965 if (sp->inodeType == VI_VOLINFO) {
1966 VolInfo = header.volumeInfo;
1969 if (VolInfo.updateDate) {
1970 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1972 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1973 (Testing ? "it would have been " : ""), update);
1975 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1977 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1978 VolInfo.id, update);
1988 SalvageVnodes(register struct InodeSummary *rwIsp,
1989 register struct InodeSummary *thisIsp,
1990 register struct ViceInodeInfo *inodes, int check)
1992 int ilarge, ismall, ioffset, RW, nInodes;
1993 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1996 RW = (rwIsp == thisIsp);
1997 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1999 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2000 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2001 if (check && ismall == -1)
2004 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2005 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2006 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2010 SalvageIndex(Inode ino, VnodeClass class, int RW,
2011 register struct ViceInodeInfo *ip, int nInodes,
2012 struct VolumeSummary *volSummary, int check)
2014 VolumeId volumeNumber;
2015 char buf[SIZEOF_LARGEDISKVNODE];
2016 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2018 StreamHandle_t *file;
2019 struct VnodeClassInfo *vcp;
2021 afs_fsize_t vnodeLength;
2022 int vnodeIndex, nVnodes;
2023 afs_ino_str_t stmp1, stmp2;
2027 volumeNumber = volSummary->header.id;
2028 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2029 fdP = IH_OPEN(handle);
2030 assert(fdP != NULL);
2031 file = FDH_FDOPEN(fdP, "r+");
2032 assert(file != NULL);
2033 vcp = &VnodeClassInfo[class];
2034 size = OS_SIZE(fdP->fd_fd);
2036 nVnodes = (size / vcp->diskSize) - 1;
2038 assert((nVnodes + 1) * vcp->diskSize == size);
2039 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2043 for (vnodeIndex = 0;
2044 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2045 nVnodes--, vnodeIndex++) {
2046 if (vnode->type != vNull) {
2047 int vnodeChanged = 0;
2048 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2049 /* Log programs that belong to root (potentially suid root);
2050 * don't bother for read-only or backup volumes */
2051 #ifdef notdef /* This is done elsewhere */
2052 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2053 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2055 if (VNDISK_GET_INO(vnode) == 0) {
2057 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2058 memset(vnode, 0, vcp->diskSize);
2062 if (vcp->magic != vnode->vnodeMagic) {
2063 /* bad magic #, probably partially created vnode */
2064 Log("Partially allocated vnode %d deleted.\n",
2066 memset(vnode, 0, vcp->diskSize);
2070 /* ****** Should do a bit more salvage here: e.g. make sure
2071 * vnode type matches what it should be given the index */
2072 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2073 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2074 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2075 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2082 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2083 /* The following doesn't work, because the version number
2084 * is not maintained correctly by the file server */
2085 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2086 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2088 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2094 /* For RW volume, look for vnode with matching inode number;
2095 * if no such match, take the first determined by our sort
2097 register struct ViceInodeInfo *lip = ip;
2098 register int lnInodes = nInodes;
2100 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2101 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2110 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2111 /* "Matching" inode */
2115 vu = vnode->uniquifier;
2116 iu = ip->u.vnode.vnodeUniquifier;
2117 vd = vnode->dataVersion;
2118 id = ip->u.vnode.inodeDataVersion;
2120 * Because of the possibility of the uniquifier overflows (> 4M)
2121 * we compare them modulo the low 22-bits; we shouldn't worry
2122 * about mismatching since they shouldn't to many old
2123 * uniquifiers of the same vnode...
2125 if (IUnique(vu) != IUnique(iu)) {
2127 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2130 vnode->uniquifier = iu;
2131 #ifdef AFS_3DISPARES
2132 vnode->dataVersion = (id >= vd ?
2135 1887437 ? vd : id) :
2138 1887437 ? id : vd));
2140 #if defined(AFS_SGI_EXMAG)
2141 vnode->dataVersion = (id >= vd ?
2144 15099494 ? vd : id) :
2147 15099494 ? id : vd));
2149 vnode->dataVersion = (id > vd ? id : vd);
2150 #endif /* AFS_SGI_EXMAG */
2151 #endif /* AFS_3DISPARES */
2154 /* don't bother checking for vd > id any more, since
2155 * partial file transfers always result in this state,
2156 * and you can't do much else anyway (you've already
2157 * found the best data you can) */
2158 #ifdef AFS_3DISPARES
2159 if (!vnodeIsDirectory(vnodeNumber)
2160 && ((vd < id && (id - vd) < 1887437)
2161 || ((vd > id && (vd - id) > 1887437)))) {
2163 #if defined(AFS_SGI_EXMAG)
2164 if (!vnodeIsDirectory(vnodeNumber)
2165 && ((vd < id && (id - vd) < 15099494)
2166 || ((vd > id && (vd - id) > 15099494)))) {
2168 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2169 #endif /* AFS_SGI_EXMAG */
2172 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2173 vnode->dataVersion = id;
2178 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2181 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2183 VNDISK_SET_INO(vnode, ip->inodeNumber);
2188 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2190 VNDISK_SET_INO(vnode, ip->inodeNumber);
2193 VNDISK_GET_LEN(vnodeLength, vnode);
2194 if (ip->byteCount != vnodeLength) {
2197 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2202 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2203 VNDISK_SET_LEN(vnode, ip->byteCount);
2207 ip->linkCount--; /* Keep the inode around */
2210 } else { /* no matching inode */
2211 if (VNDISK_GET_INO(vnode) != 0
2212 || vnode->type == vDirectory) {
2213 /* No matching inode--get rid of the vnode */
2215 if (VNDISK_GET_INO(vnode)) {
2217 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2221 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2226 if (VNDISK_GET_INO(vnode)) {
2228 time_t serverModifyTime = vnode->serverModifyTime;
2229 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2233 time_t serverModifyTime = vnode->serverModifyTime;
2234 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2237 memset(vnode, 0, vcp->diskSize);
2240 /* Should not reach here becuase we checked for
2241 * (inodeNumber == 0) above. And where we zero the vnode,
2242 * we also goto vnodeDone.
2246 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2250 } /* VNDISK_GET_INO(vnode) != 0 */
2252 assert(!(vnodeChanged && check));
2253 if (vnodeChanged && !Testing) {
2255 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2256 (char *)vnode, vcp->diskSize)
2258 VolumeChanged = 1; /* For break call back */
2269 struct VnodeEssence *
2270 CheckVnodeNumber(VnodeId vnodeNumber)
2273 struct VnodeInfo *vip;
2276 class = vnodeIdToClass(vnodeNumber);
2277 vip = &vnodeInfo[class];
2278 offset = vnodeIdToBitNumber(vnodeNumber);
2279 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2283 CopyOnWrite(register struct DirSummary *dir)
2285 /* Copy the directory unconditionally if we are going to change it:
2286 * not just if was cloned.
2288 struct VnodeDiskObject vnode;
2289 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2290 Inode oldinode, newinode;
2293 if (dir->copied || Testing)
2295 DFlush(); /* Well justified paranoia... */
2298 IH_IREAD(vnodeInfo[vLarge].handle,
2299 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2301 assert(code == sizeof(vnode));
2302 oldinode = VNDISK_GET_INO(&vnode);
2303 /* Increment the version number by a whole lot to avoid problems with
2304 * clients that were promised new version numbers--but the file server
2305 * crashed before the versions were written to disk.
2308 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2309 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2311 assert(VALID_INO(newinode));
2312 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2314 VNDISK_SET_INO(&vnode, newinode);
2316 IH_IWRITE(vnodeInfo[vLarge].handle,
2317 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2319 assert(code == sizeof(vnode));
2321 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2322 fileSysDevice, newinode);
2323 /* Don't delete the original inode right away, because the directory is
2324 * still being scanned.
2330 * This function should either successfully create a new dir, or give up
2331 * and leave things the way they were. In particular, if it fails to write
2332 * the new dir properly, it should return w/o changing the reference to the
2336 CopyAndSalvage(register struct DirSummary *dir)
2338 struct VnodeDiskObject vnode;
2339 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2340 Inode oldinode, newinode;
2345 afs_int32 parentUnique = 1;
2346 struct VnodeEssence *vnodeEssence;
2351 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2353 IH_IREAD(vnodeInfo[vLarge].handle,
2354 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2356 assert(lcode == sizeof(vnode));
2357 oldinode = VNDISK_GET_INO(&vnode);
2358 /* Increment the version number by a whole lot to avoid problems with
2359 * clients that were promised new version numbers--but the file server
2360 * crashed before the versions were written to disk.
2363 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2364 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2366 assert(VALID_INO(newinode));
2367 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2369 /* Assign . and .. vnode numbers from dir and vnode.parent.
2370 * The uniquifier for . is in the vnode.
2371 * The uniquifier for .. might be set to a bogus value of 1 and
2372 * the salvager will later clean it up.
2374 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2375 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2378 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2380 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2385 /* didn't really build the new directory properly, let's just give up. */
2386 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2387 Log("Directory salvage returned code %d, continuing.\n", code);
2389 Log("also failed to decrement link count on new inode");
2393 Log("Checking the results of the directory salvage...\n");
2394 if (!DirOK(&newdir)) {
2395 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2396 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2401 VNDISK_SET_INO(&vnode, newinode);
2402 length = Length(&newdir);
2403 VNDISK_SET_LEN(&vnode, length);
2405 IH_IWRITE(vnodeInfo[vLarge].handle,
2406 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2408 assert(lcode == sizeof(vnode));
2411 nt_sync(fileSysDevice);
2413 sync(); /* this is slow, but hopefully rarely called. We don't have
2414 * an open FD on the file itself to fsync.
2418 vnodeInfo[vLarge].handle->ih_synced = 1;
2420 /* make sure old directory file is really closed */
2421 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2422 FDH_REALLYCLOSE(fdP);
2424 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2426 dir->dirHandle = newdir;
2430 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2433 struct DirSummary *dir = (struct DirSummary *)dirVal;
2434 struct VnodeEssence *vnodeEssence;
2435 afs_int32 dirOrphaned, todelete;
2437 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2439 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2440 if (vnodeEssence == NULL) {
2442 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2446 assert(Delete(&dir->dirHandle, name) == 0);
2451 #ifndef AFS_NAMEI_ENV
2452 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2453 * mount inode for the partition. If this inode were deleted, it would crash
2456 if (vnodeEssence->InodeNumber == 0) {
2457 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2460 assert(Delete(&dir->dirHandle, name) == 0);
2467 if (!(vnodeNumber & 1) && !Showmode
2468 && !(vnodeEssence->count || vnodeEssence->unique
2469 || vnodeEssence->modeBits)) {
2470 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2471 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2472 vnodeNumber, unique,
2473 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2478 assert(Delete(&dir->dirHandle, name) == 0);
2484 /* Check if the Uniquifiers match. If not, change the directory entry
2485 * so its unique matches the vnode unique. Delete if the unique is zero
2486 * or if the directory is orphaned.
2488 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2489 if (!vnodeEssence->unique
2490 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2491 /* This is an orphaned directory. Don't delete the . or ..
2492 * entry. Otherwise, it will get created in the next
2493 * salvage and deleted again here. So Just skip it.
2498 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2501 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2505 fid.Vnode = vnodeNumber;
2506 fid.Unique = vnodeEssence->unique;
2508 assert(Delete(&dir->dirHandle, name) == 0);
2510 assert(Create(&dir->dirHandle, name, &fid) == 0);
2513 return 0; /* no need to continue */
2516 if (strcmp(name, ".") == 0) {
2517 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2520 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2523 assert(Delete(&dir->dirHandle, ".") == 0);
2524 fid.Vnode = dir->vnodeNumber;
2525 fid.Unique = dir->unique;
2526 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2529 vnodeNumber = fid.Vnode; /* Get the new Essence */
2530 unique = fid.Unique;
2531 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2534 } else if (strcmp(name, "..") == 0) {
2537 struct VnodeEssence *dotdot;
2538 pa.Vnode = dir->parent;
2539 dotdot = CheckVnodeNumber(pa.Vnode);
2540 assert(dotdot != NULL); /* XXX Should not be assert */
2541 pa.Unique = dotdot->unique;
2543 pa.Vnode = dir->vnodeNumber;
2544 pa.Unique = dir->unique;
2546 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2548 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2551 assert(Delete(&dir->dirHandle, "..") == 0);
2552 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2555 vnodeNumber = pa.Vnode; /* Get the new Essence */
2557 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2559 dir->haveDotDot = 1;
2560 } else if (strncmp(name, ".__afs", 6) == 0) {
2562 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2566 assert(Delete(&dir->dirHandle, name) == 0);
2568 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2569 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2572 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2573 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2574 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2575 && !(vnodeEssence->modeBits & 0111)) {
2581 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2582 vnodeEssence->InodeNumber);
2585 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2589 size = FDH_SIZE(fdP);
2591 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2592 FDH_REALLYCLOSE(fdP);
2599 code = FDH_READ(fdP, buf, size);
2602 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2603 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2604 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2605 Testing ? "would convert" : "converted");
2606 vnodeEssence->modeBits |= 0111;
2607 vnodeEssence->changed = 1;
2608 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2609 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2610 dir->name ? dir->name : "??", name, buf);
2612 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2613 dir->vname, vnodeNumber, size, code);
2615 FDH_REALLYCLOSE(fdP);
2618 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2619 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2620 if (vnodeIdToClass(vnodeNumber) == vLarge
2621 && vnodeEssence->name == NULL) {
2623 if ((n = (char *)malloc(strlen(name) + 1)))
2625 vnodeEssence->name = n;
2628 /* The directory entry points to the vnode. Check to see if the
2629 * vnode points back to the directory. If not, then let the
2630 * directory claim it (else it might end up orphaned). Vnodes
2631 * already claimed by another directory are deleted from this
2632 * directory: hardlinks to the same vnode are not allowed
2633 * from different directories.
2635 if (vnodeEssence->parent != dir->vnodeNumber) {
2636 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2637 /* Vnode does not point back to this directory.
2638 * Orphaned dirs cannot claim a file (it may belong to
2639 * another non-orphaned dir).
2642 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2644 vnodeEssence->parent = dir->vnodeNumber;
2645 vnodeEssence->changed = 1;
2647 /* Vnode was claimed by another directory */
2650 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2651 } else if (vnodeNumber == 1) {
2652 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2654 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2659 assert(Delete(&dir->dirHandle, name) == 0);
2664 /* This directory claims the vnode */
2665 vnodeEssence->claimed = 1;
2667 vnodeEssence->count--;
2672 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2674 register struct VnodeInfo *vip = &vnodeInfo[class];
2675 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2676 char buf[SIZEOF_LARGEDISKVNODE];
2677 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2679 StreamHandle_t *file;
2684 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2685 fdP = IH_OPEN(vip->handle);
2686 assert(fdP != NULL);
2687 file = FDH_FDOPEN(fdP, "r+");
2688 assert(file != NULL);
2689 size = OS_SIZE(fdP->fd_fd);
2691 vip->nVnodes = (size / vcp->diskSize) - 1;
2692 if (vip->nVnodes > 0) {
2693 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2694 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2695 assert((vip->vnodes = (struct VnodeEssence *)
2696 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2697 if (class == vLarge) {
2698 assert((vip->inodes = (Inode *)
2699 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2708 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2709 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2710 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2711 nVnodes--, vnodeIndex++) {
2712 if (vnode->type != vNull) {
2713 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2714 afs_fsize_t vnodeLength;
2715 vip->nAllocatedVnodes++;
2716 vep->count = vnode->linkCount;
2717 VNDISK_GET_LEN(vnodeLength, vnode);
2718 vep->blockCount = nBlocks(vnodeLength);
2719 vip->volumeBlockCount += vep->blockCount;
2720 vep->parent = vnode->parent;
2721 vep->unique = vnode->uniquifier;
2722 if (*maxu < vnode->uniquifier)
2723 *maxu = vnode->uniquifier;
2724 vep->modeBits = vnode->modeBits;
2725 vep->InodeNumber = VNDISK_GET_INO(vnode);
2726 vep->type = vnode->type;
2727 vep->author = vnode->author;
2728 vep->owner = vnode->owner;
2729 vep->group = vnode->group;
2730 if (vnode->type == vDirectory) {
2731 if (class != vLarge) {
2732 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2733 vip->nAllocatedVnodes--;
2734 memset(vnode, 0, sizeof(vnode));
2735 IH_IWRITE(vnodeInfo[vSmall].handle,
2736 vnodeIndexOffset(vcp, vnodeNumber),
2737 (char *)&vnode, sizeof(vnode));
2740 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2749 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2751 struct VnodeEssence *parentvp;
2757 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2758 && GetDirName(vp->parent, parentvp, path)) {
2760 strcat(path, vp->name);
2766 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2767 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2770 IsVnodeOrphaned(VnodeId vnode)
2772 struct VnodeEssence *vep;
2775 return (1); /* Vnode zero does not exist */
2777 return (0); /* The root dir vnode is always claimed */
2778 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2779 if (!vep || !vep->claimed)
2780 return (1); /* Vnode is not claimed - it is orphaned */
2782 return (IsVnodeOrphaned(vep->parent));
2786 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2787 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2790 static struct DirSummary dir;
2791 static struct DirHandle dirHandle;
2792 struct VnodeEssence *parent;
2793 static char path[MAXPATHLEN];
2796 if (dirVnodeInfo->vnodes[i].salvaged)
2797 return; /* already salvaged */
2800 dirVnodeInfo->vnodes[i].salvaged = 1;
2802 if (dirVnodeInfo->inodes[i] == 0)
2803 return; /* Not allocated to a directory */
2805 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2806 if (dirVnodeInfo->vnodes[i].parent) {
2807 Log("Bad parent, vnode 1; %s...\n",
2808 (Testing ? "skipping" : "salvaging"));
2809 dirVnodeInfo->vnodes[i].parent = 0;
2810 dirVnodeInfo->vnodes[i].changed = 1;
2813 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2814 if (parent && parent->salvaged == 0)
2815 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2816 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2817 rootdir, rootdirfound);
2820 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2821 dir.unique = dirVnodeInfo->vnodes[i].unique;
2824 dir.parent = dirVnodeInfo->vnodes[i].parent;
2825 dir.haveDot = dir.haveDotDot = 0;
2826 dir.ds_linkH = alinkH;
2827 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2828 dirVnodeInfo->inodes[i]);
2830 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2833 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2834 (Testing ? "skipping" : "salvaging"));
2837 CopyAndSalvage(&dir);
2841 dirHandle = dir.dirHandle;
2844 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2845 &dirVnodeInfo->vnodes[i], path);
2848 /* If enumeration failed for random reasons, we will probably delete
2849 * too much stuff, so we guard against this instead.
2851 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2854 /* Delete the old directory if it was copied in order to salvage.
2855 * CopyOnWrite has written the new inode # to the disk, but we still
2856 * have the old one in our local structure here. Thus, we idec the
2860 if (dir.copied && !Testing) {
2861 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2863 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2866 /* Remember rootdir DirSummary _after_ it has been judged */
2867 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2868 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2876 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2878 /* This routine, for now, will only be called for read-write volumes */
2880 int BlocksInVolume = 0, FilesInVolume = 0;
2881 register VnodeClass class;
2882 struct DirSummary rootdir, oldrootdir;
2883 struct VnodeInfo *dirVnodeInfo;
2884 struct VnodeDiskObject vnode;
2885 VolumeDiskData volHeader;
2887 int orphaned, rootdirfound = 0;
2888 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2889 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2890 struct VnodeEssence *vep;
2893 afs_sfsize_t nBytes;
2895 VnodeId LFVnode, ThisVnode;
2896 Unique LFUnique, ThisUnique;
2899 vid = rwIsp->volSummary->header.id;
2900 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2901 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2902 assert(nBytes == sizeof(volHeader));
2903 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2904 assert(volHeader.destroyMe != DESTROY_ME);
2905 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2907 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2909 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2912 dirVnodeInfo = &vnodeInfo[vLarge];
2913 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2914 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2918 nt_sync(fileSysDevice);
2920 sync(); /* This used to be done lower level, for every dir */
2927 /* Parse each vnode looking for orphaned vnodes and
2928 * connect them to the tree as orphaned (if requested).
2930 oldrootdir = rootdir;
2931 for (class = 0; class < nVNODECLASSES; class++) {
2932 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2933 vep = &(vnodeInfo[class].vnodes[v]);
2934 ThisVnode = bitNumberToVnodeNumber(v, class);
2935 ThisUnique = vep->unique;
2937 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2938 continue; /* Ignore unused, claimed, and root vnodes */
2940 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2941 * entry in this vnode had incremented the parent link count (In
2942 * JudgeEntry()). We need to go to the parent and decrement that
2943 * link count. But if the parent's unique is zero, then the parent
2944 * link count was not incremented in JudgeEntry().
2946 if (class == vLarge) { /* directory vnode */
2947 pv = vnodeIdToBitNumber(vep->parent);
2948 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2949 vnodeInfo[vLarge].vnodes[pv].count++;
2953 continue; /* If no rootdir, can't attach orphaned files */
2955 /* Here we attach orphaned files and directories into the
2956 * root directory, LVVnode, making sure link counts stay correct.
2958 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2959 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2960 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2962 /* Update this orphaned vnode's info. Its parent info and
2963 * link count (do for orphaned directories and files).
2965 vep->parent = LFVnode; /* Parent is the root dir */
2966 vep->unique = LFUnique;
2969 vep->count--; /* Inc link count (root dir will pt to it) */
2971 /* If this orphaned vnode is a directory, change '..'.
2972 * The name of the orphaned dir/file is unknown, so we
2973 * build a unique name. No need to CopyOnWrite the directory
2974 * since it is not connected to tree in BK or RO volume and
2975 * won't be visible there.
2977 if (class == vLarge) {
2981 /* Remove and recreate the ".." entry in this orphaned directory */
2982 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2983 vnodeInfo[class].inodes[v]);
2985 pa.Unique = LFUnique;
2986 assert(Delete(&dh, "..") == 0);
2987 assert(Create(&dh, "..", &pa) == 0);
2989 /* The original parent's link count was decremented above.
2990 * Here we increment the new parent's link count.
2992 pv = vnodeIdToBitNumber(LFVnode);
2993 vnodeInfo[vLarge].vnodes[pv].count--;
2997 /* Go to the root dir and add this entry. The link count of the
2998 * root dir was incremented when ".." was created. Try 10 times.
3000 for (j = 0; j < 10; j++) {
3001 pa.Vnode = ThisVnode;
3002 pa.Unique = ThisUnique;
3004 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3006 vLarge) ? "__ORPHANDIR__" :
3007 "__ORPHANFILE__"), ThisVnode,
3010 CopyOnWrite(&rootdir);
3011 code = Create(&rootdir.dirHandle, npath, &pa);
3015 ThisUnique += 50; /* Try creating a different file */
3018 Log("Attaching orphaned %s to volume's root dir as %s\n",
3019 ((class == vLarge) ? "directory" : "file"), npath);
3021 } /* for each vnode in the class */
3022 } /* for each class of vnode */
3024 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3026 if (!oldrootdir.copied && rootdir.copied) {
3028 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3031 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3034 DFlush(); /* Flush the changes */
3035 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3036 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3037 orphans = ORPH_IGNORE;
3040 /* Write out all changed vnodes. Orphaned files and directories
3041 * will get removed here also (if requested).
3043 for (class = 0; class < nVNODECLASSES; class++) {
3044 int nVnodes = vnodeInfo[class].nVnodes;
3045 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3046 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3047 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3048 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3049 for (i = 0; i < nVnodes; i++) {
3050 register struct VnodeEssence *vnp = &vnodes[i];
3051 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3053 /* If the vnode is good but is unclaimed (not listed in
3054 * any directory entries), then it is orphaned.
3057 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3058 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3062 if (vnp->changed || vnp->count) {
3066 IH_IREAD(vnodeInfo[class].handle,
3067 vnodeIndexOffset(vcp, vnodeNumber),
3068 (char *)&vnode, sizeof(vnode));
3069 assert(nBytes == sizeof(vnode));
3071 vnode.parent = vnp->parent;
3072 oldCount = vnode.linkCount;
3073 vnode.linkCount = vnode.linkCount - vnp->count;
3076 orphaned = IsVnodeOrphaned(vnodeNumber);
3078 if (!vnp->todelete) {
3079 /* Orphans should have already been attached (if requested) */
3080 assert(orphans != ORPH_ATTACH);
3081 oblocks += vnp->blockCount;
3084 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3086 BlocksInVolume -= vnp->blockCount;
3088 if (VNDISK_GET_INO(&vnode)) {
3090 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3093 memset(&vnode, 0, sizeof(vnode));
3095 } else if (vnp->count) {
3097 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3100 vnode.modeBits = vnp->modeBits;
3103 vnode.dataVersion++;
3106 IH_IWRITE(vnodeInfo[class].handle,
3107 vnodeIndexOffset(vcp, vnodeNumber),
3108 (char *)&vnode, sizeof(vnode));
3109 assert(nBytes == sizeof(vnode));
3115 if (!Showmode && ofiles) {
3116 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3118 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3122 for (class = 0; class < nVNODECLASSES; class++) {
3123 register struct VnodeInfo *vip = &vnodeInfo[class];
3124 for (i = 0; i < vip->nVnodes; i++)
3125 if (vip->vnodes[i].name)
3126 free(vip->vnodes[i].name);
3133 /* Set correct resource utilization statistics */
3134 volHeader.filecount = FilesInVolume;
3135 volHeader.diskused = BlocksInVolume;
3137 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3138 if (volHeader.uniquifier < (maxunique + 1)) {
3140 Log("Volume uniquifier is too low; fixed\n");
3141 /* Plus 2,000 in case there are workstations out there with
3142 * cached vnodes that have since been deleted
3144 volHeader.uniquifier = (maxunique + 1 + 2000);
3147 /* Turn off the inUse bit; the volume's been salvaged! */
3148 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3149 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3150 volHeader.inService = 1; /* allow service again */
3151 volHeader.needsCallback = (VolumeChanged != 0);
3152 volHeader.dontSalvage = DONT_SALVAGE;
3155 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3156 assert(nBytes == sizeof(volHeader));
3159 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3160 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3161 FilesInVolume, BlocksInVolume);
3163 IH_RELEASE(vnodeInfo[vSmall].handle);
3164 IH_RELEASE(vnodeInfo[vLarge].handle);
3170 ClearROInUseBit(struct VolumeSummary *summary)
3172 IHandle_t *h = summary->volumeInfoHandle;
3173 afs_sfsize_t nBytes;
3175 VolumeDiskData volHeader;
3177 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3178 assert(nBytes == sizeof(volHeader));
3179 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3180 volHeader.inUse = 0;
3181 volHeader.needsSalvaged = 0;
3182 volHeader.inService = 1;
3183 volHeader.dontSalvage = DONT_SALVAGE;
3185 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3186 assert(nBytes == sizeof(volHeader));
3191 * Possible delete the volume.
3193 * deleteMe - Always do so, only a partial volume.
3196 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3199 if (readOnly(isp) || deleteMe) {
3200 if (isp->volSummary && isp->volSummary->fileName) {
3203 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3205 Log("It will be deleted on this server (you may find it elsewhere)\n");
3208 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3210 Log("it will be deleted instead. It should be recloned.\n");
3214 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
3216 Log("Unable to unlink %s (errno = %d)\n", path, errno);
3220 } else if (!check) {
3221 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3223 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3229 AskOffline(VolumeId volumeId, char * partition)
3233 for (i = 0; i < 3; i++) {
3234 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3236 if (code == SYNC_OK) {
3238 } else if (code == SYNC_DENIED) {
3239 #ifdef DEMAND_ATTACH_ENABLE
3240 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3242 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3244 Abort("Salvage aborted\n");
3245 } else if (code == SYNC_BAD_COMMAND) {
3246 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3248 #ifdef DEMAND_ATTACH_ENABLE
3249 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3251 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3253 Abort("Salvage aborted\n");
3256 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3257 FSYNC_clientFinis();
3261 if (code != SYNC_OK) {
3262 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3263 Abort("Salvage aborted\n");
3266 #ifdef AFS_DEMAND_ATTACH_FS
3267 /* set inUse = programType in the volume header. We do this in case
3268 * the fileserver restarts/crashes while we are salvaging.
3269 * Otherwise, the fileserver could attach the volume again on
3270 * startup while we are salvaging, which would be very bad, or
3271 * schedule another salvage while we are salvaging, which would be
3276 char name[VMAXPATHLEN];
3277 struct VolumeHeader header;
3278 struct VolumeDiskHeader diskHeader;
3279 struct VolumeDiskData volHeader;
3281 afs_snprintf(name, sizeof(name), "%s/" VFORMAT, fileSysPathName,
3282 afs_printable_uint32_lu(volumeId));
3284 fd = afs_open(name, O_RDONLY);
3288 if (read(fd, &diskHeader, sizeof(diskHeader)) != sizeof(diskHeader) ||
3289 diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
3296 DiskToVolumeHeader(&header, &diskHeader);
3298 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
3299 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
3300 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
3306 volHeader.inUse = programType;
3308 /* If we can't re-write the header, bail out and error. We don't
3309 * assert when reading the header, since it's possible the
3310 * header isn't really there (when there's no data associated
3311 * with the volume; we just delete the vol header file in that
3312 * case). But if it's there enough that we can read it, but
3313 * somehow we cannot write to it to signify we're salvaging it,
3314 * we've got a big problem and we cannot continue. */
3315 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
3319 #endif /* AFS_DEMAND_ATTACH_FS */
3323 AskOnline(VolumeId volumeId, char *partition)
3327 for (i = 0; i < 3; i++) {
3328 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3330 if (code == SYNC_OK) {
3332 } else if (code == SYNC_DENIED) {
3333 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3334 } else if (code == SYNC_BAD_COMMAND) {
3335 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3337 #ifdef DEMAND_ATTACH_ENABLE
3338 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3340 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3345 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3346 FSYNC_clientFinis();
3353 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3355 /* Volume parameter is passed in case iopen is upgraded in future to
3356 * require a volume Id to be passed
3359 IHandle_t *srcH, *destH;
3360 FdHandle_t *srcFdP, *destFdP;
3363 IH_INIT(srcH, device, rwvolume, inode1);
3364 srcFdP = IH_OPEN(srcH);
3365 assert(srcFdP != NULL);
3366 IH_INIT(destH, device, rwvolume, inode2);
3367 destFdP = IH_OPEN(destH);
3369 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3370 assert(FDH_WRITE(destFdP, buf, n) == n);
3372 FDH_REALLYCLOSE(srcFdP);
3373 FDH_REALLYCLOSE(destFdP);
3380 PrintInodeList(void)
3382 register struct ViceInodeInfo *ip;
3383 struct ViceInodeInfo *buf;
3384 struct afs_stat status;
3385 register int nInodes;
3387 assert(afs_fstat(inodeFd, &status) == 0);
3388 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3389 assert(buf != NULL);
3390 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3391 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3392 for (ip = buf; nInodes--; ip++) {
3393 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3394 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3395 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3396 ip->u.param[2], ip->u.param[3]);
3402 PrintInodeSummary(void)
3405 struct InodeSummary *isp;
3407 for (i = 0; i < nVolumesInInodeFile; i++) {
3408 isp = &inodeSummary[i];
3409 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3414 PrintVolumeSummary(void)
3417 struct VolumeSummary *vsp;
3419 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3420 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3430 assert(0); /* Fork is never executed in the NT code path */
3434 #ifdef AFS_DEMAND_ATTACH_FS
3435 if ((f == 0) && (programType == salvageServer)) {
3436 /* we are a salvageserver child */
3437 #ifdef FSSYNC_BUILD_CLIENT
3438 VChildProcReconnectFS_r();
3440 #ifdef SALVSYNC_BUILD_CLIENT
3444 #endif /* AFS_DEMAND_ATTACH_FS */
3445 #endif /* !AFS_NT40_ENV */
3455 #ifdef AFS_DEMAND_ATTACH_FS
3456 if (programType == salvageServer) {
3457 #ifdef SALVSYNC_BUILD_CLIENT
3460 #ifdef FSSYNC_BUILD_CLIENT
3464 #endif /* AFS_DEMAND_ATTACH_FS */
3467 if (main_thread != pthread_self())
3468 pthread_exit((void *)code);
3481 pid = wait(&status);
3483 if (WCOREDUMP(status))
3484 Log("\"%s\" core dumped!\n", prog);
3485 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3491 TimeStamp(time_t clock, int precision)
3494 static char timestamp[20];
3495 lt = localtime(&clock);
3497 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3499 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3504 CheckLogFile(char * log_path)
3506 char oldSlvgLog[AFSDIR_PATH_MAX];
3508 #ifndef AFS_NT40_ENV
3515 strcpy(oldSlvgLog, log_path);
3516 strcat(oldSlvgLog, ".old");
3518 renamefile(log_path, oldSlvgLog);
3519 logFile = afs_fopen(log_path, "a");
3521 if (!logFile) { /* still nothing, use stdout */
3525 #ifndef AFS_NAMEI_ENV
3526 AFS_DEBUG_IOPS_LOG(logFile);
3531 #ifndef AFS_NT40_ENV
3533 TimeStampLogFile(char * log_path)
3535 char stampSlvgLog[AFSDIR_PATH_MAX];
3540 lt = localtime(&now);
3541 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3542 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3543 log_path, lt->tm_year + 1900,
3544 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3547 /* try to link the logfile to a timestamped filename */
3548 /* if it fails, oh well, nothing we can do */
3549 link(log_path, stampSlvgLog);
3558 #ifndef AFS_NT40_ENV
3560 printf("Can't show log since using syslog.\n");
3569 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3572 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3575 while (fgets(line, sizeof(line), logFile))
3582 Log(const char *format, ...)
3588 va_start(args, format);
3589 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3591 #ifndef AFS_NT40_ENV
3593 syslog(LOG_INFO, "%s", tmp);
3597 gettimeofday(&now, 0);
3598 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3604 Abort(const char *format, ...)
3609 va_start(args, format);
3610 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3612 #ifndef AFS_NT40_ENV
3614 syslog(LOG_INFO, "%s", tmp);
3618 fprintf(logFile, "%s", tmp);
3633 p = (char *)malloc(strlen(s) + 1);
3639 /* Remove the FORCESALVAGE file */
3641 RemoveTheForce(char *path)
3644 struct afs_stat force; /* so we can use afs_stat to find it */
3645 strcpy(target,path);
3646 strcat(target,"/FORCESALVAGE");
3647 if (!Testing && ForceSalvage) {
3648 if (afs_stat(target,&force) == 0) unlink(target);
3652 #ifndef AFS_AIX32_ENV
3654 * UseTheForceLuke - see if we can use the force
3657 UseTheForceLuke(char *path)
3659 struct afs_stat force;
3661 strcpy(target,path);
3662 strcat(target,"/FORCESALVAGE");
3664 return (afs_stat(target, &force) == 0);
3668 * UseTheForceLuke - see if we can use the force
3671 * The VRMIX fsck will not muck with the filesystem it is supposedly
3672 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3673 * muck directly with the root inode, which is within the normal
3675 * ListViceInodes() has a side effect of setting ForceSalvage if
3676 * it detects a need, based on root inode examination.
3679 UseTheForceLuke(char *path)
3682 return 0; /* sorry OB1 */
3687 /* NT support routines */
3689 static char execpathname[MAX_PATH];
3691 nt_SalvagePartition(char *partName, int jobn)
3696 if (!*execpathname) {
3697 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3698 if (!n || n == 1023)
3701 job.cj_magic = SALVAGER_MAGIC;
3702 job.cj_number = jobn;
3703 (void)strcpy(job.cj_part, partName);
3704 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3709 nt_SetupPartitionSalvage(void *datap, int len)
3711 childJob_t *jobp = (childJob_t *) datap;
3712 char logname[AFSDIR_PATH_MAX];
3714 if (len != sizeof(childJob_t))
3716 if (jobp->cj_magic != SALVAGER_MAGIC)
3721 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3723 logFile = afs_fopen(logname, "w");
3731 #endif /* AFS_NT40_ENV */