2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
110 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
111 #define WCOREDUMP(x) (x & 0200)
114 #include <afs/afsint.h>
115 #include <afs/assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
170 #include <afs/afsutil.h>
171 #include <afs/fileutil.h>
172 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
180 #include <afs/afssyscalls.h>
184 #include "partition.h"
185 #include "daemon_com.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
196 /*@+fcnmacros +macrofcndecl@*/
199 extern off64_t afs_lseek(int FD, off64_t O, int F);
200 #endif /*S_SPLINT_S */
201 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
202 #define afs_stat stat64
203 #define afs_fstat fstat64
204 #define afs_open open64
205 #define afs_fopen fopen64
206 #else /* !O_LARGEFILE */
208 extern off_t afs_lseek(int FD, off_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
211 #define afs_stat stat
212 #define afs_fstat fstat
213 #define afs_open open
214 #define afs_fopen fopen
215 #endif /* !O_LARGEFILE */
216 /*@=fcnmacros =macrofcndecl@*/
219 extern void *calloc();
221 static char *TimeStamp(time_t clock, int precision);
224 int debug; /* -d flag */
225 extern int Testing; /* -n flag */
226 int ListInodeOption; /* -i flag */
227 int ShowRootFiles; /* -r flag */
228 int RebuildDirs; /* -sal flag */
229 int Parallel = 4; /* -para X flag */
230 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
231 int forceR = 0; /* -b flag */
232 int ShowLog = 0; /* -showlog flag */
233 int ShowSuid = 0; /* -showsuid flag */
234 int ShowMounts = 0; /* -showmounts flag */
235 int orphans = ORPH_IGNORE; /* -orphans option */
240 int useSyslog = 0; /* -syslog flag */
241 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
250 #define MAXPARALLEL 32
252 int OKToZap; /* -o flag */
253 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
254 * in the volume header */
256 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
258 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
260 Device fileSysDevice; /* The device number of the current
261 * partition being salvaged */
265 char *fileSysPath; /* The path of the mounted partition currently
266 * being salvaged, i.e. the directory
267 * containing the volume headers */
269 char *fileSysPathName; /* NT needs this to make name pretty in log. */
270 IHandle_t *VGLinkH; /* Link handle for current volume group. */
271 int VGLinkH_cnt; /* # of references to lnk handle. */
272 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
274 char *fileSysDeviceName; /* The block device where the file system
275 * being salvaged was mounted */
276 char *filesysfulldev;
278 int VolumeChanged; /* Set by any routine which would change the volume in
279 * a way which would require callback is to be broken if the
280 * volume was put back on line by an active file server */
282 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
284 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
285 int inodeFd; /* File descriptor for inode file */
288 struct VnodeInfo vnodeInfo[nVNODECLASSES];
291 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
292 int nVolumes; /* Number of volumes (read-write and read-only)
293 * in volume summary */
299 /* Forward declarations */
300 /*@printflike@*/ void Log(const char *format, ...);
301 /*@printflike@*/ void Abort(const char *format, ...);
302 static int IsVnodeOrphaned(VnodeId vnode);
304 /* Uniquifier stored in the Inode */
309 return (u & 0x3fffff);
311 #if defined(AFS_SGI_EXMAG)
312 return (u & SGI_UNIQMASK);
315 #endif /* AFS_SGI_EXMAG */
320 BadError(register int aerror)
322 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
324 return 0; /* otherwise may be transient, e.g. EMFILE */
329 char *save_args[MAX_ARGS];
331 extern pthread_t main_thread;
332 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
335 /* Get the salvage lock if not already held. Hold until process exits. */
337 ObtainSalvageLock(void)
343 (int)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
344 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
345 if (salvageLock == (int)INVALID_HANDLE_VALUE) {
347 "salvager: There appears to be another salvager running! Aborted.\n");
352 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
353 if (salvageLock < 0) {
355 "salvager: can't open salvage lock file %s, aborting\n",
356 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
359 #ifdef AFS_DARWIN_ENV
360 if (flock(salvageLock, LOCK_EX) == -1) {
362 if (lockf(salvageLock, F_LOCK, 0) == -1) {
365 "salvager: There appears to be another salvager running! Aborted.\n");
372 #ifdef AFS_SGI_XFS_IOPS_ENV
373 /* Check if the given partition is mounted. For XFS, the root inode is not a
374 * constant. So we check the hard way.
377 IsPartitionMounted(char *part)
380 struct mntent *mntent;
382 assert(mntfp = setmntent(MOUNTED, "r"));
383 while (mntent = getmntent(mntfp)) {
384 if (!strcmp(part, mntent->mnt_dir))
389 return mntent ? 1 : 1;
392 /* Check if the given inode is the root of the filesystem. */
393 #ifndef AFS_SGI_XFS_IOPS_ENV
395 IsRootInode(struct afs_stat *status)
398 * The root inode is not a fixed value in XFS partitions. So we need to
399 * see if the partition is in the list of mounted partitions. This only
400 * affects the SalvageFileSys path, so we check there.
402 return (status->st_ino == ROOTINODE);
407 #ifndef AFS_NAMEI_ENV
408 /* We don't want to salvage big files filesystems, since we can't put volumes on
412 CheckIfBigFilesFS(char *mountPoint, char *devName)
414 struct superblock fs;
417 if (strncmp(devName, "/dev/", 5)) {
418 (void)sprintf(name, "/dev/%s", devName);
420 (void)strcpy(name, devName);
423 if (ReadSuper(&fs, name) < 0) {
424 Log("Unable to read superblock. Not salvaging partition %s.\n",
428 if (IsBigFilesFileSystem(&fs)) {
429 Log("Partition %s is a big files filesystem, not salvaging.\n",
439 #define HDSTR "\\Device\\Harddisk"
440 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
442 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
447 static int dowarn = 1;
449 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
451 if (strncmp(res, HDSTR, HDLEN)) {
454 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
455 res, HDSTR, p1->devName);
459 d1 = atoi(&res[HDLEN]);
461 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
463 if (strncmp(res, HDSTR, HDLEN)) {
466 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
467 res, HDSTR, p2->devName);
471 d2 = atoi(&res[HDLEN]);
476 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
479 /* This assumes that two partitions with the same device number divided by
480 * PartsPerDisk are on the same disk.
483 SalvageFileSysParallel(struct DiskPartition64 *partP)
486 struct DiskPartition64 *partP;
487 int pid; /* Pid for this job */
488 int jobnumb; /* Log file job number */
489 struct job *nextjob; /* Next partition on disk to salvage */
491 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
492 struct job *thisjob = 0;
493 static int numjobs = 0;
494 static int jobcount = 0;
500 char logFileName[256];
504 /* We have a partition to salvage. Copy it into thisjob */
505 thisjob = (struct job *)malloc(sizeof(struct job));
507 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
510 memset(thisjob, 0, sizeof(struct job));
511 thisjob->partP = partP;
512 thisjob->jobnumb = jobcount;
514 } else if (jobcount == 0) {
515 /* We are asking to wait for all jobs (partp == 0), yet we never
518 Log("No file system partitions named %s* found; not salvaged\n",
519 VICE_PARTITION_PREFIX);
523 if (debug || Parallel == 1) {
525 SalvageFileSys(thisjob->partP, 0);
532 /* Check to see if thisjob is for a disk that we are already
533 * salvaging. If it is, link it in as the next job to do. The
534 * jobs array has 1 entry per disk being salvages. numjobs is
535 * the total number of disks currently being salvaged. In
536 * order to keep thejobs array compact, when a disk is
537 * completed, the hightest element in the jobs array is moved
538 * down to now open slot.
540 for (j = 0; j < numjobs; j++) {
541 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
542 /* On same disk, add it to this list and return */
543 thisjob->nextjob = jobs[j]->nextjob;
544 jobs[j]->nextjob = thisjob;
551 /* Loop until we start thisjob or until all existing jobs are finished */
552 while (thisjob || (!partP && (numjobs > 0))) {
553 startjob = -1; /* No new job to start */
555 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
556 /* Either the max jobs are running or we have to wait for all
557 * the jobs to finish. In either case, we wait for at least one
558 * job to finish. When it's done, clean up after it.
560 pid = wait(&wstatus);
562 for (j = 0; j < numjobs; j++) { /* Find which job it is */
563 if (pid == jobs[j]->pid)
567 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
568 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
571 numjobs--; /* job no longer running */
572 oldjob = jobs[j]; /* remember */
573 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
574 free(oldjob); /* free the old job */
576 /* If there is another partition on the disk to salvage, then
577 * say we will start it (startjob). If not, then put thisjob there
578 * and say we will start it.
580 if (jobs[j]) { /* Another partitions to salvage */
581 startjob = j; /* Will start it */
582 } else { /* There is not another partition to salvage */
584 jobs[j] = thisjob; /* Add thisjob */
586 startjob = j; /* Will start it */
588 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
589 startjob = -1; /* Don't start it - already running */
593 /* We don't have to wait for a job to complete */
595 jobs[numjobs] = thisjob; /* Add this job */
597 startjob = numjobs; /* Will start it */
601 /* Start up a new salvage job on a partition in job slot "startjob" */
602 if (startjob != -1) {
604 Log("Starting salvage of file system partition %s\n",
605 jobs[startjob]->partP->name);
607 /* For NT, we not only fork, but re-exec the salvager. Pass in the
608 * commands and pass the child job number via the data path.
611 nt_SalvagePartition(jobs[startjob]->partP->name,
612 jobs[startjob]->jobnumb);
613 jobs[startjob]->pid = pid;
618 jobs[startjob]->pid = pid;
624 for (fd = 0; fd < 16; fd++)
631 openlog("salvager", LOG_PID, useSyslogFacility);
635 (void)afs_snprintf(logFileName, sizeof logFileName,
637 AFSDIR_SERVER_SLVGLOG_FILEPATH,
638 jobs[startjob]->jobnumb);
639 logFile = afs_fopen(logFileName, "w");
644 SalvageFileSys1(jobs[startjob]->partP, 0);
649 } /* while ( thisjob || (!partP && numjobs > 0) ) */
651 /* If waited for all jobs to complete, now collect log files and return */
653 if (!useSyslog) /* if syslogging - no need to collect */
656 for (i = 0; i < jobcount; i++) {
657 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
658 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
659 if ((passLog = afs_fopen(logFileName, "r"))) {
660 while (fgets(buf, sizeof(buf), passLog)) {
665 (void)unlink(logFileName);
674 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
676 if (!canfork || debug || Fork() == 0) {
677 SalvageFileSys1(partP, singleVolumeNumber);
678 if (canfork && !debug) {
683 Wait("SalvageFileSys");
687 get_DevName(char *pbuffer, char *wpath)
689 char pbuf[128], *ptr;
690 strcpy(pbuf, pbuffer);
691 ptr = (char *)strrchr(pbuf, '/');
697 ptr = (char *)strrchr(pbuffer, '/');
699 strcpy(pbuffer, ptr + 1);
706 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
709 char inodeListPath[256];
710 static char tmpDevName[100];
711 static char wpath[100];
712 struct VolumeSummary *vsp, *esp;
715 fileSysPartition = partP;
716 fileSysDevice = fileSysPartition->device;
717 fileSysPathName = VPartitionPath(fileSysPartition);
720 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
721 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
722 name = partP->devName;
724 fileSysPath = fileSysPathName;
725 strcpy(tmpDevName, partP->devName);
726 name = get_DevName(tmpDevName, wpath);
727 fileSysDeviceName = name;
728 filesysfulldev = wpath;
731 VLockPartition(partP->name);
732 if (singleVolumeNumber || ForceSalvage)
735 ForceSalvage = UseTheForceLuke(fileSysPath);
737 if (singleVolumeNumber) {
738 /* salvageserver already setup fssync conn for us */
739 if ((programType != salvageServer) && !VConnectFS()) {
740 Abort("Couldn't connect to file server\n");
742 AskOffline(singleVolumeNumber, partP->name);
745 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
746 partP->name, name, (Testing ? "(READONLY mode)" : ""));
748 Log("***Forced salvage of all volumes on this partition***\n");
753 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
760 assert((dirp = opendir(fileSysPath)) != NULL);
761 while ((dp = readdir(dirp))) {
762 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
763 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
765 Log("Removing old salvager temp files %s\n", dp->d_name);
766 strcpy(npath, fileSysPath);
768 strcat(npath, dp->d_name);
774 tdir = (tmpdir ? tmpdir : fileSysPath);
776 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
777 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
779 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
782 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
783 unlink(inodeListPath);
787 /* Using nt_unlink here since we're really using the delete on close
788 * semantics of unlink. In most places in the salvager, we really do
789 * mean to unlink the file at that point. Those places have been
790 * modified to actually do that so that the NT crt can be used there.
793 _open_osfhandle((long)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
794 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
796 inodeFd = afs_open(inodeListPath, O_RDONLY);
797 unlink(inodeListPath);
800 Abort("Temporary file %s is missing...\n", inodeListPath);
801 if (ListInodeOption) {
805 /* enumerate volumes in the partition.
806 * figure out sets of read-only + rw volumes.
807 * salvage each set, read-only volumes first, then read-write.
808 * Fix up inodes on last volume in set (whether it is read-write
811 GetVolumeSummary(singleVolumeNumber);
813 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
814 i < nVolumesInInodeFile; i = j) {
815 VolumeId rwvid = inodeSummary[i].RWvolumeId;
817 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
819 VolumeId vid = inodeSummary[j].volumeId;
820 struct VolumeSummary *tsp;
821 /* Scan volume list (from partition root directory) looking for the
822 * current rw volume number in the volume list from the inode scan.
823 * If there is one here that is not in the inode volume list,
825 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
827 DeleteExtraVolumeHeaderFile(vsp);
829 /* Now match up the volume summary info from the root directory with the
830 * entry in the volume list obtained from scanning inodes */
831 inodeSummary[j].volSummary = NULL;
832 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
833 if (tsp->header.id == vid) {
834 inodeSummary[j].volSummary = tsp;
840 /* Salvage the group of volumes (several read-only + 1 read/write)
841 * starting with the current read-only volume we're looking at.
843 SalvageVolumeGroup(&inodeSummary[i], j - i);
846 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
847 for (; vsp < esp; vsp++) {
849 DeleteExtraVolumeHeaderFile(vsp);
852 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
853 RemoveTheForce(fileSysPath);
855 if (!Testing && singleVolumeNumber) {
856 AskOnline(singleVolumeNumber, fileSysPartition->name);
858 /* Step through the volumeSummary list and set all volumes on-line.
859 * The volumes were taken off-line in GetVolumeSummary.
861 for (j = 0; j < nVolumes; j++) {
862 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
866 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
867 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
870 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
874 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
877 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", vsp->fileName, (Testing ? "would have been " : ""));
879 unlink(vsp->fileName);
883 CompareInodes(const void *_p1, const void *_p2)
885 register const struct ViceInodeInfo *p1 = _p1;
886 register const struct ViceInodeInfo *p2 = _p2;
887 if (p1->u.vnode.vnodeNumber == INODESPECIAL
888 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
889 VolumeId p1rwid, p2rwid;
891 (p1->u.vnode.vnodeNumber ==
892 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
894 (p2->u.vnode.vnodeNumber ==
895 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
900 if (p1->u.vnode.vnodeNumber == INODESPECIAL
901 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
902 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
903 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
904 if (p1->u.vnode.volumeId == p1rwid)
906 if (p2->u.vnode.volumeId == p2rwid)
908 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
910 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
911 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
912 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
914 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
916 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
918 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
920 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
922 /* The following tests are reversed, so that the most desirable
923 * of several similar inodes comes first */
924 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
926 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
927 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
931 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
932 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
937 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
939 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
940 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
944 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
945 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
950 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
952 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
953 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
957 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
958 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
963 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
965 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
966 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
970 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
971 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
980 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
981 register struct InodeSummary *summary)
983 int volume = ip->u.vnode.volumeId;
984 int rwvolume = volume;
985 register n, nSpecial;
986 register Unique maxunique;
989 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
991 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
993 rwvolume = ip->u.special.parentId;
994 /* This isn't quite right, as there could (in error) be different
995 * parent inodes in different special vnodes */
997 if (maxunique < ip->u.vnode.vnodeUniquifier)
998 maxunique = ip->u.vnode.vnodeUniquifier;
1002 summary->volumeId = volume;
1003 summary->RWvolumeId = rwvolume;
1004 summary->nInodes = n;
1005 summary->nSpecialInodes = nSpecial;
1006 summary->maxUniquifier = maxunique;
1010 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, int singleVolumeNumber, void *rock)
1012 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1013 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1014 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1019 * Collect list of inodes in file named by path. If a truly fatal error,
1020 * unlink the file and abort. For lessor errors, return -1. The file will
1021 * be unlinked by the caller.
1024 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1026 struct afs_stat status;
1028 struct ViceInodeInfo *ip;
1029 struct InodeSummary summary;
1030 char summaryFileName[50];
1033 char *dev = fileSysPath;
1034 char *wpath = fileSysPath;
1036 char *dev = fileSysDeviceName;
1037 char *wpath = filesysfulldev;
1039 char *part = fileSysPath;
1042 /* This file used to come from vfsck; cobble it up ourselves now... */
1044 ListViceInodes(dev, fileSysPath, path,
1045 singleVolumeNumber ? OnlyOneVolume : 0,
1046 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1048 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1052 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1054 if (forceSal && !ForceSalvage) {
1055 Log("***Forced salvage of all volumes on this partition***\n");
1058 inodeFd = afs_open(path, O_RDWR);
1059 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1061 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1063 tdir = (tmpdir ? tmpdir : part);
1065 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1066 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1068 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1069 "%s/salvage.temp.%d", tdir, getpid());
1071 summaryFile = afs_fopen(summaryFileName, "a+");
1072 if (summaryFile == NULL) {
1075 Abort("Unable to create inode summary file\n");
1077 if (!canfork || debug || Fork() == 0) {
1079 unsigned long st_size=(unsigned long) status.st_size;
1080 nInodes = st_size / sizeof(struct ViceInodeInfo);
1082 fclose(summaryFile);
1084 unlink(summaryFileName);
1085 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1086 RemoveTheForce(fileSysPath);
1088 struct VolumeSummary *vsp;
1091 GetVolumeSummary(singleVolumeNumber);
1093 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1095 DeleteExtraVolumeHeaderFile(vsp);
1098 Log("%s vice inodes on %s; not salvaged\n",
1099 singleVolumeNumber ? "No applicable" : "No", dev);
1102 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1104 fclose(summaryFile);
1107 unlink(summaryFileName);
1109 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1112 if (read(inodeFd, ip, st_size) != st_size) {
1113 fclose(summaryFile);
1116 unlink(summaryFileName);
1117 Abort("Unable to read inode table; %s not salvaged\n", dev);
1119 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1120 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1121 || write(inodeFd, ip, st_size) != st_size) {
1122 fclose(summaryFile);
1125 unlink(summaryFileName);
1126 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1130 CountVolumeInodes(ip, nInodes, &summary);
1131 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1132 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1133 fclose(summaryFile);
1137 summary.index += (summary.nInodes);
1138 nInodes -= summary.nInodes;
1139 ip += summary.nInodes;
1141 /* Following fflush is not fclose, because if it was debug mode would not work */
1142 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1143 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1144 fclose(summaryFile);
1148 if (canfork && !debug) {
1153 if (Wait("Inode summary") == -1) {
1154 fclose(summaryFile);
1157 unlink(summaryFileName);
1158 Exit(1); /* salvage of this partition aborted */
1161 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1162 if (status.st_size != 0) {
1164 unsigned long st_status=(unsigned long)status.st_size;
1165 inodeSummary = (struct InodeSummary *)malloc(st_status);
1166 assert(inodeSummary != NULL);
1167 /* For GNU we need to do lseek to get the file pointer moved. */
1168 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1169 ret = read(fileno(summaryFile), inodeSummary, st_status);
1170 assert(ret == st_status);
1172 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1173 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1174 fclose(summaryFile);
1176 unlink(summaryFileName);
1180 /* Comparison routine for volume sort.
1181 This is setup so that a read-write volume comes immediately before
1182 any read-only clones of that volume */
1184 CompareVolumes(const void *_p1, const void *_p2)
1186 register const struct VolumeSummary *p1 = _p1;
1187 register const struct VolumeSummary *p2 = _p2;
1188 if (p1->header.parent != p2->header.parent)
1189 return p1->header.parent < p2->header.parent ? -1 : 1;
1190 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1192 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1194 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1198 GetVolumeSummary(VolumeId singleVolumeNumber)
1201 afs_int32 nvols = 0;
1202 struct VolumeSummary *vsp, vs;
1203 struct VolumeDiskHeader diskHeader;
1206 /* Get headers from volume directory */
1207 if (chdir(fileSysPath) == -1 || (dirp = opendir(".")) == NULL)
1208 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1209 if (!singleVolumeNumber) {
1210 while ((dp = readdir(dirp))) {
1211 char *p = dp->d_name;
1212 p = strrchr(dp->d_name, '.');
1213 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1215 if ((fd = afs_open(dp->d_name, O_RDONLY)) != -1
1216 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1217 == sizeof(diskHeader)
1218 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1219 DiskToVolumeHeader(&vs.header, &diskHeader);
1227 dirp = opendir("."); /* No rewinddir for NT */
1234 (struct VolumeSummary *)malloc(nvols *
1235 sizeof(struct VolumeSummary));
1238 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1239 assert(volumeSummaryp != NULL);
1242 vsp = volumeSummaryp;
1243 while ((dp = readdir(dirp))) {
1244 char *p = dp->d_name;
1245 p = strrchr(dp->d_name, '.');
1246 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1249 if ((fd = afs_open(dp->d_name, O_RDONLY)) == -1
1250 || read(fd, &diskHeader, sizeof(diskHeader))
1251 != sizeof(diskHeader)
1252 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1257 if (!singleVolumeNumber) {
1259 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1264 char nameShouldBe[64];
1265 DiskToVolumeHeader(&vsp->header, &diskHeader);
1266 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1267 && vsp->header.parent != singleVolumeNumber) {
1268 if (programType == salvageServer) {
1269 #ifdef SALVSYNC_BUILD_CLIENT
1270 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1271 vsp->header.id, vsp->header.parent);
1272 if (SALVSYNC_LinkVolume(vsp->header.parent,
1274 fileSysPartition->name,
1276 Log("schedule request failed\n");
1279 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1281 Log("%u is a read-only volume; not salvaged\n",
1282 singleVolumeNumber);
1286 if (!singleVolumeNumber
1287 || (vsp->header.id == singleVolumeNumber
1288 || vsp->header.parent == singleVolumeNumber)) {
1289 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1290 VFORMAT, vsp->header.id);
1291 if (singleVolumeNumber
1292 && vsp->header.id != singleVolumeNumber)
1293 AskOffline(vsp->header.id, fileSysPartition->name);
1294 if (strcmp(nameShouldBe, dp->d_name)) {
1296 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1300 vsp->fileName = ToString(dp->d_name);
1310 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1314 /* Find the link table. This should be associated with the RW volume or, if
1315 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1318 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1319 struct ViceInodeInfo *allInodes)
1322 struct ViceInodeInfo *ip;
1324 for (i = 0; i < nVols; i++) {
1325 ip = allInodes + isp[i].index;
1326 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1327 if (ip[j].u.special.type == VI_LINKTABLE)
1328 return ip[j].inodeNumber;
1335 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1337 struct versionStamp version;
1340 if (!VALID_INO(ino))
1342 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1343 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1344 if (!VALID_INO(ino))
1346 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1347 isp->RWvolumeId, errno);
1348 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1349 fdP = IH_OPEN(VGLinkH);
1351 Abort("Can't open link table for volume %u (error = %d)\n",
1352 isp->RWvolumeId, errno);
1354 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1355 Abort("Can't truncate link table for volume %u (error = %d)\n",
1356 isp->RWvolumeId, errno);
1358 version.magic = LINKTABLEMAGIC;
1359 version.version = LINKTABLEVERSION;
1361 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1363 Abort("Can't truncate link table for volume %u (error = %d)\n",
1364 isp->RWvolumeId, errno);
1366 FDH_REALLYCLOSE(fdP);
1368 /* If the volume summary exits (i.e., the V*.vol header file exists),
1369 * then set this inode there as well.
1371 if (isp->volSummary)
1372 isp->volSummary->header.linkTable = ino;
1381 SVGParms_t *parms = (SVGParms_t *) arg;
1382 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1387 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1390 pthread_attr_t tattr;
1394 /* Initialize per volume global variables, even if later code does so */
1398 memset(&VolInfo, 0, sizeof(VolInfo));
1400 parms.svgp_inodeSummaryp = isp;
1401 parms.svgp_count = nVols;
1402 code = pthread_attr_init(&tattr);
1404 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1408 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1410 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1413 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1415 Log("Failed to create thread to salvage volume group %u\n",
1419 (void)pthread_join(tid, NULL);
1421 #endif /* AFS_NT40_ENV */
1424 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1426 struct ViceInodeInfo *inodes, *allInodes, *ip;
1427 int i, totalInodes, size, salvageTo;
1431 int dec_VGLinkH = 0;
1433 FdHandle_t *fdP = NULL;
1436 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1437 && isp->nSpecialInodes > 0);
1438 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1439 if (!ForceSalvage && QuickCheck(isp, nVols))
1442 if (ShowMounts && !haveRWvolume)
1444 if (canfork && !debug && Fork() != 0) {
1445 (void)Wait("Salvage volume group");
1448 for (i = 0, totalInodes = 0; i < nVols; i++)
1449 totalInodes += isp[i].nInodes;
1450 size = totalInodes * sizeof(struct ViceInodeInfo);
1451 inodes = (struct ViceInodeInfo *)malloc(size);
1452 allInodes = inodes - isp->index; /* this would the base of all the inodes
1453 * for the partition, if all the inodes
1454 * had been read into memory */
1456 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1458 assert(read(inodeFd, inodes, size) == size);
1460 /* Don't try to salvage a read write volume if there isn't one on this
1462 salvageTo = haveRWvolume ? 0 : 1;
1464 #ifdef AFS_NAMEI_ENV
1465 ino = FindLinkHandle(isp, nVols, allInodes);
1466 if (VALID_INO(ino)) {
1467 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1468 fdP = IH_OPEN(VGLinkH);
1470 if (!VALID_INO(ino) || fdP == NULL) {
1471 Log("%s link table for volume %u.\n",
1472 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1474 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1477 struct ViceInodeInfo *ip;
1478 CreateLinkTable(isp, ino);
1479 fdP = IH_OPEN(VGLinkH);
1480 /* Sync fake 1 link counts to the link table, now that it exists */
1482 for (i = 0; i < nVols; i++) {
1483 ip = allInodes + isp[i].index;
1484 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1486 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1488 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1496 FDH_REALLYCLOSE(fdP);
1498 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1501 /* Salvage in reverse order--read/write volume last; this way any
1502 * Inodes not referenced by the time we salvage the read/write volume
1503 * can be picked up by the read/write volume */
1504 /* ACTUALLY, that's not done right now--the inodes just vanish */
1505 for (i = nVols - 1; i >= salvageTo; i--) {
1507 struct InodeSummary *lisp = &isp[i];
1508 #ifdef AFS_NAMEI_ENV
1509 /* If only the RO is present on this partition, the link table
1510 * shows up as a RW volume special file. Need to make sure the
1511 * salvager doesn't try to salvage the non-existent RW.
1513 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1514 /* If this only special inode is the link table, continue */
1515 if (inodes->u.special.type == VI_LINKTABLE) {
1522 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1523 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1524 /* Check inodes twice. The second time do things seriously. This
1525 * way the whole RO volume can be deleted, below, if anything goes wrong */
1526 for (check = 1; check >= 0; check--) {
1528 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1530 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1531 if (rw && deleteMe) {
1532 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1533 * volume won't be called */
1539 if (rw && check == 1)
1541 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1542 MaybeZapVolume(lisp, "Vnode index", 0, check);
1548 /* Fix actual inode counts */
1550 Log("totalInodes %d\n",totalInodes);
1551 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1552 static int TraceBadLinkCounts = 0;
1553 #ifdef AFS_NAMEI_ENV
1554 if (VGLinkH->ih_ino == ip->inodeNumber) {
1555 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1556 VGLinkH_p1 = ip->u.param[0];
1557 continue; /* Deal with this last. */
1560 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1561 TraceBadLinkCounts--; /* Limit reports, per volume */
1562 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1564 while (ip->linkCount > 0) {
1565 /* below used to assert, not break */
1567 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1568 Log("idec failed. inode %s errno %d\n",
1569 PrintInode(NULL, ip->inodeNumber), errno);
1575 while (ip->linkCount < 0) {
1576 /* these used to be asserts */
1578 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1579 Log("iinc failed. inode %s errno %d\n",
1580 PrintInode(NULL, ip->inodeNumber), errno);
1587 #ifdef AFS_NAMEI_ENV
1588 while (dec_VGLinkH > 0) {
1589 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1590 Log("idec failed on link table, errno = %d\n", errno);
1594 while (dec_VGLinkH < 0) {
1595 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1596 Log("iinc failed on link table, errno = %d\n", errno);
1603 /* Directory consistency checks on the rw volume */
1605 SalvageVolume(isp, VGLinkH);
1606 IH_RELEASE(VGLinkH);
1608 if (canfork && !debug) {
1615 QuickCheck(register struct InodeSummary *isp, int nVols)
1617 /* Check headers BEFORE forking */
1621 for (i = 0; i < nVols; i++) {
1622 struct VolumeSummary *vs = isp[i].volSummary;
1623 VolumeDiskData volHeader;
1625 /* Don't salvage just because phantom rw volume is there... */
1626 /* (If a read-only volume exists, read/write inodes must also exist) */
1627 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1631 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1632 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1633 == sizeof(volHeader)
1634 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1635 && volHeader.dontSalvage == DONT_SALVAGE
1636 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1637 if (volHeader.inUse != 0) {
1638 volHeader.inUse = 0;
1639 volHeader.inService = 1;
1641 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1642 != sizeof(volHeader)) {
1658 /* SalvageVolumeHeaderFile
1660 * Salvage the top level V*.vol header file. Make sure the special files
1661 * exist and that there are no duplicates.
1663 * Calls SalvageHeader for each possible type of volume special file.
1667 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1668 register struct ViceInodeInfo *inodes, int RW,
1669 int check, int *deleteMe)
1673 register struct ViceInodeInfo *ip;
1674 int allinodesobsolete = 1;
1675 struct VolumeDiskHeader diskHeader;
1679 memset(&tempHeader, 0, sizeof(tempHeader));
1680 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1681 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1682 tempHeader.id = isp->volumeId;
1683 tempHeader.parent = isp->RWvolumeId;
1684 /* Check for duplicates (inodes are sorted by type field) */
1685 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1686 ip = &inodes[isp->index + i];
1687 if (ip->u.special.type == (ip + 1)->u.special.type) {
1689 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1693 for (i = 0; i < isp->nSpecialInodes; i++) {
1694 ip = &inodes[isp->index + i];
1695 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1697 Log("Rubbish header inode\n");
1700 Log("Rubbish header inode; deleted\n");
1701 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1702 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1703 if (!check && ip->u.special.type != VI_LINKTABLE)
1704 ip->linkCount--; /* Keep the inode around */
1705 allinodesobsolete = 0;
1709 if (allinodesobsolete) {
1716 VGLinkH_cnt++; /* one for every header. */
1718 if (!RW && !check && isp->volSummary) {
1719 ClearROInUseBit(isp->volSummary);
1723 for (i = 0; i < MAXINODETYPE; i++) {
1724 if (stuff[i].inodeType == VI_LINKTABLE) {
1725 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1726 * And we may have recreated the link table earlier, so set the
1727 * RW header as well.
1729 if (VALID_INO(VGLinkH->ih_ino)) {
1730 *stuff[i].inode = VGLinkH->ih_ino;
1734 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1738 if (isp->volSummary == NULL) {
1740 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1742 Log("No header file for volume %u\n", isp->volumeId);
1746 Log("No header file for volume %u; %screating %s/%s\n",
1747 isp->volumeId, (Testing ? "it would have been " : ""),
1748 fileSysPathName, name);
1749 headerFd = afs_open(name, O_RDWR | O_CREAT | O_TRUNC, 0644);
1750 assert(headerFd != -1);
1751 isp->volSummary = (struct VolumeSummary *)
1752 malloc(sizeof(struct VolumeSummary));
1753 isp->volSummary->fileName = ToString(name);
1756 /* hack: these two fields are obsolete... */
1757 isp->volSummary->header.volumeAcl = 0;
1758 isp->volSummary->header.volumeMountTable = 0;
1761 (&isp->volSummary->header, &tempHeader,
1762 sizeof(struct VolumeHeader))) {
1763 /* We often remove the name before calling us, so we make a fake one up */
1764 if (isp->volSummary->fileName) {
1765 strcpy(name, isp->volSummary->fileName);
1767 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1768 isp->volSummary->fileName = ToString(name);
1771 Log("Header file %s is damaged or no longer valid%s\n", name,
1772 (check ? "" : "; repairing"));
1776 headerFd = afs_open(name, O_RDWR | O_TRUNC, 0644);
1777 assert(headerFd != -1);
1781 memcpy(&isp->volSummary->header, &tempHeader,
1782 sizeof(struct VolumeHeader));
1785 Log("It would have written a new header file for volume %u\n",
1788 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1789 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1790 != sizeof(struct VolumeDiskHeader)) {
1791 Log("Couldn't rewrite volume header file!\n");
1798 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1799 isp->volSummary->header.volumeInfo);
1804 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1808 VolumeDiskData volumeInfo;
1809 struct versionStamp fileHeader;
1818 #ifndef AFS_NAMEI_ENV
1819 if (sp->inodeType == VI_LINKTABLE)
1822 if (*(sp->inode) == 0) {
1824 Log("Missing inode in volume header (%s)\n", sp->description);
1828 Log("Missing inode in volume header (%s); %s\n", sp->description,
1829 (Testing ? "it would have recreated it" : "recreating"));
1832 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1833 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1834 if (!VALID_INO(*(sp->inode)))
1836 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1837 sp->description, errno);
1842 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1843 fdP = IH_OPEN(specH);
1844 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1845 /* bail out early and destroy the volume */
1847 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1854 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1855 sp->description, errno);
1858 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1859 || header.fileHeader.magic != sp->stamp.magic)) {
1861 Log("Part of the header (%s) is corrupted\n", sp->description);
1862 FDH_REALLYCLOSE(fdP);
1866 Log("Part of the header (%s) is corrupted; recreating\n",
1870 if (sp->inodeType == VI_VOLINFO
1871 && header.volumeInfo.destroyMe == DESTROY_ME) {
1874 FDH_REALLYCLOSE(fdP);
1878 if (recreate && !Testing) {
1881 ("Internal error: recreating volume header (%s) in check mode\n",
1883 code = FDH_TRUNC(fdP, 0);
1885 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1886 sp->description, errno);
1888 /* The following code should be moved into vutil.c */
1889 if (sp->inodeType == VI_VOLINFO) {
1891 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1892 header.volumeInfo.stamp = sp->stamp;
1893 header.volumeInfo.id = isp->volumeId;
1894 header.volumeInfo.parentId = isp->RWvolumeId;
1895 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1896 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1897 isp->volumeId, isp->volumeId);
1898 header.volumeInfo.inService = 0;
1899 header.volumeInfo.blessed = 0;
1900 /* The + 1000 is a hack in case there are any files out in venus caches */
1901 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1902 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1903 header.volumeInfo.needsCallback = 0;
1904 gettimeofday(&tp, 0);
1905 header.volumeInfo.creationDate = tp.tv_sec;
1906 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1908 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1909 sp->description, errno);
1912 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1913 sizeof(header.volumeInfo));
1914 if (code != sizeof(header.volumeInfo)) {
1917 ("Unable to write volume header file (%s) (errno = %d)\n",
1918 sp->description, errno);
1919 Abort("Unable to write entire volume header file (%s)\n",
1923 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1925 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1926 sp->description, errno);
1928 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1929 if (code != sizeof(sp->stamp)) {
1932 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1933 sp->description, errno);
1935 ("Unable to write entire version stamp in volume header file (%s)\n",
1940 FDH_REALLYCLOSE(fdP);
1942 if (sp->inodeType == VI_VOLINFO) {
1943 VolInfo = header.volumeInfo;
1946 if (VolInfo.updateDate) {
1947 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1949 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1950 (Testing ? "it would have been " : ""), update);
1952 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1954 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1955 VolInfo.id, update);
1965 SalvageVnodes(register struct InodeSummary *rwIsp,
1966 register struct InodeSummary *thisIsp,
1967 register struct ViceInodeInfo *inodes, int check)
1969 int ilarge, ismall, ioffset, RW, nInodes;
1970 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1973 RW = (rwIsp == thisIsp);
1974 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1976 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1977 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1978 if (check && ismall == -1)
1981 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1982 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1983 return (ilarge == 0 && ismall == 0 ? 0 : -1);
1987 SalvageIndex(Inode ino, VnodeClass class, int RW,
1988 register struct ViceInodeInfo *ip, int nInodes,
1989 struct VolumeSummary *volSummary, int check)
1991 VolumeId volumeNumber;
1992 char buf[SIZEOF_LARGEDISKVNODE];
1993 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
1995 StreamHandle_t *file;
1996 struct VnodeClassInfo *vcp;
1998 afs_fsize_t vnodeLength;
1999 int vnodeIndex, nVnodes;
2000 afs_ino_str_t stmp1, stmp2;
2004 volumeNumber = volSummary->header.id;
2005 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2006 fdP = IH_OPEN(handle);
2007 assert(fdP != NULL);
2008 file = FDH_FDOPEN(fdP, "r+");
2009 assert(file != NULL);
2010 vcp = &VnodeClassInfo[class];
2011 size = OS_SIZE(fdP->fd_fd);
2013 nVnodes = (size / vcp->diskSize) - 1;
2015 assert((nVnodes + 1) * vcp->diskSize == size);
2016 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2020 for (vnodeIndex = 0;
2021 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2022 nVnodes--, vnodeIndex++) {
2023 if (vnode->type != vNull) {
2024 int vnodeChanged = 0;
2025 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2026 /* Log programs that belong to root (potentially suid root);
2027 * don't bother for read-only or backup volumes */
2028 #ifdef notdef /* This is done elsewhere */
2029 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2030 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2032 if (VNDISK_GET_INO(vnode) == 0) {
2034 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2035 memset(vnode, 0, vcp->diskSize);
2039 if (vcp->magic != vnode->vnodeMagic) {
2040 /* bad magic #, probably partially created vnode */
2041 Log("Partially allocated vnode %d deleted.\n",
2043 memset(vnode, 0, vcp->diskSize);
2047 /* ****** Should do a bit more salvage here: e.g. make sure
2048 * vnode type matches what it should be given the index */
2049 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2050 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2051 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2052 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2059 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2060 /* The following doesn't work, because the version number
2061 * is not maintained correctly by the file server */
2062 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2063 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2065 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2071 /* For RW volume, look for vnode with matching inode number;
2072 * if no such match, take the first determined by our sort
2074 register struct ViceInodeInfo *lip = ip;
2075 register int lnInodes = nInodes;
2077 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2078 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2087 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2088 /* "Matching" inode */
2092 vu = vnode->uniquifier;
2093 iu = ip->u.vnode.vnodeUniquifier;
2094 vd = vnode->dataVersion;
2095 id = ip->u.vnode.inodeDataVersion;
2097 * Because of the possibility of the uniquifier overflows (> 4M)
2098 * we compare them modulo the low 22-bits; we shouldn't worry
2099 * about mismatching since they shouldn't to many old
2100 * uniquifiers of the same vnode...
2102 if (IUnique(vu) != IUnique(iu)) {
2104 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2107 vnode->uniquifier = iu;
2108 #ifdef AFS_3DISPARES
2109 vnode->dataVersion = (id >= vd ?
2112 1887437 ? vd : id) :
2115 1887437 ? id : vd));
2117 #if defined(AFS_SGI_EXMAG)
2118 vnode->dataVersion = (id >= vd ?
2121 15099494 ? vd : id) :
2124 15099494 ? id : vd));
2126 vnode->dataVersion = (id > vd ? id : vd);
2127 #endif /* AFS_SGI_EXMAG */
2128 #endif /* AFS_3DISPARES */
2131 /* don't bother checking for vd > id any more, since
2132 * partial file transfers always result in this state,
2133 * and you can't do much else anyway (you've already
2134 * found the best data you can) */
2135 #ifdef AFS_3DISPARES
2136 if (!vnodeIsDirectory(vnodeNumber)
2137 && ((vd < id && (id - vd) < 1887437)
2138 || ((vd > id && (vd - id) > 1887437)))) {
2140 #if defined(AFS_SGI_EXMAG)
2141 if (!vnodeIsDirectory(vnodeNumber)
2142 && ((vd < id && (id - vd) < 15099494)
2143 || ((vd > id && (vd - id) > 15099494)))) {
2145 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2146 #endif /* AFS_SGI_EXMAG */
2149 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2150 vnode->dataVersion = id;
2155 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2158 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2160 VNDISK_SET_INO(vnode, ip->inodeNumber);
2165 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2167 VNDISK_SET_INO(vnode, ip->inodeNumber);
2170 VNDISK_GET_LEN(vnodeLength, vnode);
2171 if (ip->byteCount != vnodeLength) {
2174 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2179 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2180 VNDISK_SET_LEN(vnode, ip->byteCount);
2184 ip->linkCount--; /* Keep the inode around */
2187 } else { /* no matching inode */
2188 if (VNDISK_GET_INO(vnode) != 0
2189 || vnode->type == vDirectory) {
2190 /* No matching inode--get rid of the vnode */
2192 if (VNDISK_GET_INO(vnode)) {
2194 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2198 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2203 if (VNDISK_GET_INO(vnode)) {
2205 time_t serverModifyTime = vnode->serverModifyTime;
2206 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2210 time_t serverModifyTime = vnode->serverModifyTime;
2211 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2214 memset(vnode, 0, vcp->diskSize);
2217 /* Should not reach here becuase we checked for
2218 * (inodeNumber == 0) above. And where we zero the vnode,
2219 * we also goto vnodeDone.
2223 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2227 } /* VNDISK_GET_INO(vnode) != 0 */
2229 assert(!(vnodeChanged && check));
2230 if (vnodeChanged && !Testing) {
2232 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2233 (char *)vnode, vcp->diskSize)
2235 VolumeChanged = 1; /* For break call back */
2246 struct VnodeEssence *
2247 CheckVnodeNumber(VnodeId vnodeNumber)
2250 struct VnodeInfo *vip;
2253 class = vnodeIdToClass(vnodeNumber);
2254 vip = &vnodeInfo[class];
2255 offset = vnodeIdToBitNumber(vnodeNumber);
2256 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2260 CopyOnWrite(register struct DirSummary *dir)
2262 /* Copy the directory unconditionally if we are going to change it:
2263 * not just if was cloned.
2265 struct VnodeDiskObject vnode;
2266 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2267 Inode oldinode, newinode;
2270 if (dir->copied || Testing)
2272 DFlush(); /* Well justified paranoia... */
2275 IH_IREAD(vnodeInfo[vLarge].handle,
2276 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2278 assert(code == sizeof(vnode));
2279 oldinode = VNDISK_GET_INO(&vnode);
2280 /* Increment the version number by a whole lot to avoid problems with
2281 * clients that were promised new version numbers--but the file server
2282 * crashed before the versions were written to disk.
2285 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2286 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2288 assert(VALID_INO(newinode));
2289 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2291 VNDISK_SET_INO(&vnode, newinode);
2293 IH_IWRITE(vnodeInfo[vLarge].handle,
2294 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2296 assert(code == sizeof(vnode));
2298 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2299 fileSysDevice, newinode);
2300 /* Don't delete the original inode right away, because the directory is
2301 * still being scanned.
2307 * This function should either successfully create a new dir, or give up
2308 * and leave things the way they were. In particular, if it fails to write
2309 * the new dir properly, it should return w/o changing the reference to the
2313 CopyAndSalvage(register struct DirSummary *dir)
2315 struct VnodeDiskObject vnode;
2316 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2317 Inode oldinode, newinode;
2322 afs_int32 parentUnique = 1;
2323 struct VnodeEssence *vnodeEssence;
2328 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2330 IH_IREAD(vnodeInfo[vLarge].handle,
2331 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2333 assert(lcode == sizeof(vnode));
2334 oldinode = VNDISK_GET_INO(&vnode);
2335 /* Increment the version number by a whole lot to avoid problems with
2336 * clients that were promised new version numbers--but the file server
2337 * crashed before the versions were written to disk.
2340 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2341 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2343 assert(VALID_INO(newinode));
2344 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2346 /* Assign . and .. vnode numbers from dir and vnode.parent.
2347 * The uniquifier for . is in the vnode.
2348 * The uniquifier for .. might be set to a bogus value of 1 and
2349 * the salvager will later clean it up.
2351 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2352 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2355 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2357 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2362 /* didn't really build the new directory properly, let's just give up. */
2363 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2364 Log("Directory salvage returned code %d, continuing.\n", code);
2366 Log("also failed to decrement link count on new inode");
2370 Log("Checking the results of the directory salvage...\n");
2371 if (!DirOK(&newdir)) {
2372 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2373 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2378 VNDISK_SET_INO(&vnode, newinode);
2379 length = Length(&newdir);
2380 VNDISK_SET_LEN(&vnode, length);
2382 IH_IWRITE(vnodeInfo[vLarge].handle,
2383 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2385 assert(lcode == sizeof(vnode));
2388 nt_sync(fileSysDevice);
2390 sync(); /* this is slow, but hopefully rarely called. We don't have
2391 * an open FD on the file itself to fsync.
2395 vnodeInfo[vLarge].handle->ih_synced = 1;
2397 /* make sure old directory file is really closed */
2398 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2399 FDH_REALLYCLOSE(fdP);
2401 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2403 dir->dirHandle = newdir;
2407 JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
2410 struct VnodeEssence *vnodeEssence;
2411 afs_int32 dirOrphaned, todelete;
2413 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2415 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2416 if (vnodeEssence == NULL) {
2418 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2422 assert(Delete(&dir->dirHandle, name) == 0);
2427 #ifndef AFS_NAMEI_ENV
2428 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2429 * mount inode for the partition. If this inode were deleted, it would crash
2432 if (vnodeEssence->InodeNumber == 0) {
2433 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2436 assert(Delete(&dir->dirHandle, name) == 0);
2443 if (!(vnodeNumber & 1) && !Showmode
2444 && !(vnodeEssence->count || vnodeEssence->unique
2445 || vnodeEssence->modeBits)) {
2446 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2447 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2448 vnodeNumber, unique,
2449 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2454 assert(Delete(&dir->dirHandle, name) == 0);
2460 /* Check if the Uniquifiers match. If not, change the directory entry
2461 * so its unique matches the vnode unique. Delete if the unique is zero
2462 * or if the directory is orphaned.
2464 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2465 if (!vnodeEssence->unique
2466 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2467 /* This is an orphaned directory. Don't delete the . or ..
2468 * entry. Otherwise, it will get created in the next
2469 * salvage and deleted again here. So Just skip it.
2474 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2477 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2481 fid.Vnode = vnodeNumber;
2482 fid.Unique = vnodeEssence->unique;
2484 assert(Delete(&dir->dirHandle, name) == 0);
2486 assert(Create(&dir->dirHandle, name, &fid) == 0);
2489 return; /* no need to continue */
2492 if (strcmp(name, ".") == 0) {
2493 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2496 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2499 assert(Delete(&dir->dirHandle, ".") == 0);
2500 fid.Vnode = dir->vnodeNumber;
2501 fid.Unique = dir->unique;
2502 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2505 vnodeNumber = fid.Vnode; /* Get the new Essence */
2506 unique = fid.Unique;
2507 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2510 } else if (strcmp(name, "..") == 0) {
2513 struct VnodeEssence *dotdot;
2514 pa.Vnode = dir->parent;
2515 dotdot = CheckVnodeNumber(pa.Vnode);
2516 assert(dotdot != NULL); /* XXX Should not be assert */
2517 pa.Unique = dotdot->unique;
2519 pa.Vnode = dir->vnodeNumber;
2520 pa.Unique = dir->unique;
2522 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2524 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2527 assert(Delete(&dir->dirHandle, "..") == 0);
2528 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2531 vnodeNumber = pa.Vnode; /* Get the new Essence */
2533 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2535 dir->haveDotDot = 1;
2536 } else if (strncmp(name, ".__afs", 6) == 0) {
2538 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2542 assert(Delete(&dir->dirHandle, name) == 0);
2544 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2545 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2548 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2549 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2550 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2551 && !(vnodeEssence->modeBits & 0111)) {
2557 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2558 vnodeEssence->InodeNumber);
2561 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2565 size = FDH_SIZE(fdP);
2567 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2568 FDH_REALLYCLOSE(fdP);
2575 code = FDH_READ(fdP, buf, size);
2578 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2579 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2580 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2581 Testing ? "would convert" : "converted");
2582 vnodeEssence->modeBits |= 0111;
2583 vnodeEssence->changed = 1;
2584 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2585 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2586 dir->name ? dir->name : "??", name, buf);
2588 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2589 dir->vname, vnodeNumber, size, code);
2591 FDH_REALLYCLOSE(fdP);
2594 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2595 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2596 if (vnodeIdToClass(vnodeNumber) == vLarge
2597 && vnodeEssence->name == NULL) {
2599 if ((n = (char *)malloc(strlen(name) + 1)))
2601 vnodeEssence->name = n;
2604 /* The directory entry points to the vnode. Check to see if the
2605 * vnode points back to the directory. If not, then let the
2606 * directory claim it (else it might end up orphaned). Vnodes
2607 * already claimed by another directory are deleted from this
2608 * directory: hardlinks to the same vnode are not allowed
2609 * from different directories.
2611 if (vnodeEssence->parent != dir->vnodeNumber) {
2612 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2613 /* Vnode does not point back to this directory.
2614 * Orphaned dirs cannot claim a file (it may belong to
2615 * another non-orphaned dir).
2618 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2620 vnodeEssence->parent = dir->vnodeNumber;
2621 vnodeEssence->changed = 1;
2623 /* Vnode was claimed by another directory */
2626 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2627 } else if (vnodeNumber == 1) {
2628 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2630 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2635 assert(Delete(&dir->dirHandle, name) == 0);
2640 /* This directory claims the vnode */
2641 vnodeEssence->claimed = 1;
2643 vnodeEssence->count--;
2647 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2649 register struct VnodeInfo *vip = &vnodeInfo[class];
2650 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2651 char buf[SIZEOF_LARGEDISKVNODE];
2652 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2654 StreamHandle_t *file;
2659 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2660 fdP = IH_OPEN(vip->handle);
2661 assert(fdP != NULL);
2662 file = FDH_FDOPEN(fdP, "r+");
2663 assert(file != NULL);
2664 size = OS_SIZE(fdP->fd_fd);
2666 vip->nVnodes = (size / vcp->diskSize) - 1;
2667 if (vip->nVnodes > 0) {
2668 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2669 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2670 assert((vip->vnodes = (struct VnodeEssence *)
2671 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2672 if (class == vLarge) {
2673 assert((vip->inodes = (Inode *)
2674 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2683 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2684 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2685 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2686 nVnodes--, vnodeIndex++) {
2687 if (vnode->type != vNull) {
2688 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2689 afs_fsize_t vnodeLength;
2690 vip->nAllocatedVnodes++;
2691 vep->count = vnode->linkCount;
2692 VNDISK_GET_LEN(vnodeLength, vnode);
2693 vep->blockCount = nBlocks(vnodeLength);
2694 vip->volumeBlockCount += vep->blockCount;
2695 vep->parent = vnode->parent;
2696 vep->unique = vnode->uniquifier;
2697 if (*maxu < vnode->uniquifier)
2698 *maxu = vnode->uniquifier;
2699 vep->modeBits = vnode->modeBits;
2700 vep->InodeNumber = VNDISK_GET_INO(vnode);
2701 vep->type = vnode->type;
2702 vep->author = vnode->author;
2703 vep->owner = vnode->owner;
2704 vep->group = vnode->group;
2705 if (vnode->type == vDirectory) {
2706 assert(class == vLarge);
2707 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2716 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2718 struct VnodeEssence *parentvp;
2724 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2725 && GetDirName(vp->parent, parentvp, path)) {
2727 strcat(path, vp->name);
2733 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2734 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2737 IsVnodeOrphaned(VnodeId vnode)
2739 struct VnodeEssence *vep;
2742 return (1); /* Vnode zero does not exist */
2744 return (0); /* The root dir vnode is always claimed */
2745 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2746 if (!vep || !vep->claimed)
2747 return (1); /* Vnode is not claimed - it is orphaned */
2749 return (IsVnodeOrphaned(vep->parent));
2753 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2754 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2757 static struct DirSummary dir;
2758 static struct DirHandle dirHandle;
2759 struct VnodeEssence *parent;
2760 static char path[MAXPATHLEN];
2763 if (dirVnodeInfo->vnodes[i].salvaged)
2764 return; /* already salvaged */
2767 dirVnodeInfo->vnodes[i].salvaged = 1;
2769 if (dirVnodeInfo->inodes[i] == 0)
2770 return; /* Not allocated to a directory */
2772 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2773 if (dirVnodeInfo->vnodes[i].parent) {
2774 Log("Bad parent, vnode 1; %s...\n",
2775 (Testing ? "skipping" : "salvaging"));
2776 dirVnodeInfo->vnodes[i].parent = 0;
2777 dirVnodeInfo->vnodes[i].changed = 1;
2780 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2781 if (parent && parent->salvaged == 0)
2782 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2783 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2784 rootdir, rootdirfound);
2787 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2788 dir.unique = dirVnodeInfo->vnodes[i].unique;
2791 dir.parent = dirVnodeInfo->vnodes[i].parent;
2792 dir.haveDot = dir.haveDotDot = 0;
2793 dir.ds_linkH = alinkH;
2794 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2795 dirVnodeInfo->inodes[i]);
2797 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2800 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2801 (Testing ? "skipping" : "salvaging"));
2804 CopyAndSalvage(&dir);
2808 dirHandle = dir.dirHandle;
2811 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2812 &dirVnodeInfo->vnodes[i], path);
2815 /* If enumeration failed for random reasons, we will probably delete
2816 * too much stuff, so we guard against this instead.
2818 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2821 /* Delete the old directory if it was copied in order to salvage.
2822 * CopyOnWrite has written the new inode # to the disk, but we still
2823 * have the old one in our local structure here. Thus, we idec the
2827 if (dir.copied && !Testing) {
2828 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2830 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2833 /* Remember rootdir DirSummary _after_ it has been judged */
2834 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2835 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2843 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2845 /* This routine, for now, will only be called for read-write volumes */
2847 int BlocksInVolume = 0, FilesInVolume = 0;
2848 register VnodeClass class;
2849 struct DirSummary rootdir, oldrootdir;
2850 struct VnodeInfo *dirVnodeInfo;
2851 struct VnodeDiskObject vnode;
2852 VolumeDiskData volHeader;
2854 int orphaned, rootdirfound = 0;
2855 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2856 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2857 struct VnodeEssence *vep;
2860 afs_sfsize_t nBytes;
2862 VnodeId LFVnode, ThisVnode;
2863 Unique LFUnique, ThisUnique;
2866 vid = rwIsp->volSummary->header.id;
2867 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2868 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2869 assert(nBytes == sizeof(volHeader));
2870 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2871 assert(volHeader.destroyMe != DESTROY_ME);
2872 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2874 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2876 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2879 dirVnodeInfo = &vnodeInfo[vLarge];
2880 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2881 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2885 nt_sync(fileSysDevice);
2887 sync(); /* This used to be done lower level, for every dir */
2894 /* Parse each vnode looking for orphaned vnodes and
2895 * connect them to the tree as orphaned (if requested).
2897 oldrootdir = rootdir;
2898 for (class = 0; class < nVNODECLASSES; class++) {
2899 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2900 vep = &(vnodeInfo[class].vnodes[v]);
2901 ThisVnode = bitNumberToVnodeNumber(v, class);
2902 ThisUnique = vep->unique;
2904 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2905 continue; /* Ignore unused, claimed, and root vnodes */
2907 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2908 * entry in this vnode had incremented the parent link count (In
2909 * JudgeEntry()). We need to go to the parent and decrement that
2910 * link count. But if the parent's unique is zero, then the parent
2911 * link count was not incremented in JudgeEntry().
2913 if (class == vLarge) { /* directory vnode */
2914 pv = vnodeIdToBitNumber(vep->parent);
2915 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2916 vnodeInfo[vLarge].vnodes[pv].count++;
2920 continue; /* If no rootdir, can't attach orphaned files */
2922 /* Here we attach orphaned files and directories into the
2923 * root directory, LVVnode, making sure link counts stay correct.
2925 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2926 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2927 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2929 /* Update this orphaned vnode's info. Its parent info and
2930 * link count (do for orphaned directories and files).
2932 vep->parent = LFVnode; /* Parent is the root dir */
2933 vep->unique = LFUnique;
2936 vep->count--; /* Inc link count (root dir will pt to it) */
2938 /* If this orphaned vnode is a directory, change '..'.
2939 * The name of the orphaned dir/file is unknown, so we
2940 * build a unique name. No need to CopyOnWrite the directory
2941 * since it is not connected to tree in BK or RO volume and
2942 * won't be visible there.
2944 if (class == vLarge) {
2948 /* Remove and recreate the ".." entry in this orphaned directory */
2949 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2950 vnodeInfo[class].inodes[v]);
2952 pa.Unique = LFUnique;
2953 assert(Delete(&dh, "..") == 0);
2954 assert(Create(&dh, "..", &pa) == 0);
2956 /* The original parent's link count was decremented above.
2957 * Here we increment the new parent's link count.
2959 pv = vnodeIdToBitNumber(LFVnode);
2960 vnodeInfo[vLarge].vnodes[pv].count--;
2964 /* Go to the root dir and add this entry. The link count of the
2965 * root dir was incremented when ".." was created. Try 10 times.
2967 for (j = 0; j < 10; j++) {
2968 pa.Vnode = ThisVnode;
2969 pa.Unique = ThisUnique;
2971 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
2973 vLarge) ? "__ORPHANDIR__" :
2974 "__ORPHANFILE__"), ThisVnode,
2977 CopyOnWrite(&rootdir);
2978 code = Create(&rootdir.dirHandle, npath, &pa);
2982 ThisUnique += 50; /* Try creating a different file */
2985 Log("Attaching orphaned %s to volume's root dir as %s\n",
2986 ((class == vLarge) ? "directory" : "file"), npath);
2988 } /* for each vnode in the class */
2989 } /* for each class of vnode */
2991 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
2993 if (!oldrootdir.copied && rootdir.copied) {
2995 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
2998 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3001 DFlush(); /* Flush the changes */
3002 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3003 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3004 orphans = ORPH_IGNORE;
3007 /* Write out all changed vnodes. Orphaned files and directories
3008 * will get removed here also (if requested).
3010 for (class = 0; class < nVNODECLASSES; class++) {
3011 int nVnodes = vnodeInfo[class].nVnodes;
3012 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3013 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3014 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3015 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3016 for (i = 0; i < nVnodes; i++) {
3017 register struct VnodeEssence *vnp = &vnodes[i];
3018 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3020 /* If the vnode is good but is unclaimed (not listed in
3021 * any directory entries), then it is orphaned.
3024 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3025 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3029 if (vnp->changed || vnp->count) {
3033 IH_IREAD(vnodeInfo[class].handle,
3034 vnodeIndexOffset(vcp, vnodeNumber),
3035 (char *)&vnode, sizeof(vnode));
3036 assert(nBytes == sizeof(vnode));
3038 vnode.parent = vnp->parent;
3039 oldCount = vnode.linkCount;
3040 vnode.linkCount = vnode.linkCount - vnp->count;
3043 orphaned = IsVnodeOrphaned(vnodeNumber);
3045 if (!vnp->todelete) {
3046 /* Orphans should have already been attached (if requested) */
3047 assert(orphans != ORPH_ATTACH);
3048 oblocks += vnp->blockCount;
3051 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3053 BlocksInVolume -= vnp->blockCount;
3055 if (VNDISK_GET_INO(&vnode)) {
3057 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3060 memset(&vnode, 0, sizeof(vnode));
3062 } else if (vnp->count) {
3064 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3067 vnode.modeBits = vnp->modeBits;
3070 vnode.dataVersion++;
3073 IH_IWRITE(vnodeInfo[class].handle,
3074 vnodeIndexOffset(vcp, vnodeNumber),
3075 (char *)&vnode, sizeof(vnode));
3076 assert(nBytes == sizeof(vnode));
3082 if (!Showmode && ofiles) {
3083 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3085 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3089 for (class = 0; class < nVNODECLASSES; class++) {
3090 register struct VnodeInfo *vip = &vnodeInfo[class];
3091 for (i = 0; i < vip->nVnodes; i++)
3092 if (vip->vnodes[i].name)
3093 free(vip->vnodes[i].name);
3100 /* Set correct resource utilization statistics */
3101 volHeader.filecount = FilesInVolume;
3102 volHeader.diskused = BlocksInVolume;
3104 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3105 if (volHeader.uniquifier < (maxunique + 1)) {
3107 Log("Volume uniquifier is too low; fixed\n");
3108 /* Plus 2,000 in case there are workstations out there with
3109 * cached vnodes that have since been deleted
3111 volHeader.uniquifier = (maxunique + 1 + 2000);
3114 /* Turn off the inUse bit; the volume's been salvaged! */
3115 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3116 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3117 volHeader.inService = 1; /* allow service again */
3118 volHeader.needsCallback = (VolumeChanged != 0);
3119 volHeader.dontSalvage = DONT_SALVAGE;
3122 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3123 assert(nBytes == sizeof(volHeader));
3126 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3127 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3128 FilesInVolume, BlocksInVolume);
3130 IH_RELEASE(vnodeInfo[vSmall].handle);
3131 IH_RELEASE(vnodeInfo[vLarge].handle);
3137 ClearROInUseBit(struct VolumeSummary *summary)
3139 IHandle_t *h = summary->volumeInfoHandle;
3140 afs_sfsize_t nBytes;
3142 VolumeDiskData volHeader;
3144 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3145 assert(nBytes == sizeof(volHeader));
3146 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3147 volHeader.inUse = 0;
3148 volHeader.needsSalvaged = 0;
3149 volHeader.inService = 1;
3150 volHeader.dontSalvage = DONT_SALVAGE;
3152 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3153 assert(nBytes == sizeof(volHeader));
3158 * Possible delete the volume.
3160 * deleteMe - Always do so, only a partial volume.
3163 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3166 if (readOnly(isp) || deleteMe) {
3167 if (isp->volSummary && isp->volSummary->fileName) {
3170 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3172 Log("It will be deleted on this server (you may find it elsewhere)\n");
3175 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3177 Log("it will be deleted instead. It should be recloned.\n");
3180 unlink(isp->volSummary->fileName);
3182 } else if (!check) {
3183 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3185 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3191 AskOffline(VolumeId volumeId, char * partition)
3195 for (i = 0; i < 3; i++) {
3196 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3198 if (code == SYNC_OK) {
3200 } else if (code == SYNC_DENIED) {
3201 #ifdef DEMAND_ATTACH_ENABLE
3202 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3204 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3206 Abort("Salvage aborted\n");
3207 } else if (code == SYNC_BAD_COMMAND) {
3208 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3210 #ifdef DEMAND_ATTACH_ENABLE
3211 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3213 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3215 Abort("Salvage aborted\n");
3218 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3219 FSYNC_clientFinis();
3223 if (code != SYNC_OK) {
3224 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3225 Abort("Salvage aborted\n");
3230 AskOnline(VolumeId volumeId, char *partition)
3234 for (i = 0; i < 3; i++) {
3235 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3237 if (code == SYNC_OK) {
3239 } else if (code == SYNC_DENIED) {
3240 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3241 } else if (code == SYNC_BAD_COMMAND) {
3242 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3244 #ifdef DEMAND_ATTACH_ENABLE
3245 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3247 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3252 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3253 FSYNC_clientFinis();
3260 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3262 /* Volume parameter is passed in case iopen is upgraded in future to
3263 * require a volume Id to be passed
3266 IHandle_t *srcH, *destH;
3267 FdHandle_t *srcFdP, *destFdP;
3270 IH_INIT(srcH, device, rwvolume, inode1);
3271 srcFdP = IH_OPEN(srcH);
3272 assert(srcFdP != NULL);
3273 IH_INIT(destH, device, rwvolume, inode2);
3274 destFdP = IH_OPEN(destH);
3276 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3277 assert(FDH_WRITE(destFdP, buf, n) == n);
3279 FDH_REALLYCLOSE(srcFdP);
3280 FDH_REALLYCLOSE(destFdP);
3287 PrintInodeList(void)
3289 register struct ViceInodeInfo *ip;
3290 struct ViceInodeInfo *buf;
3291 struct afs_stat status;
3294 assert(afs_fstat(inodeFd, &status) == 0);
3295 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3296 assert(buf != NULL);
3297 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3298 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3299 for (ip = buf; nInodes--; ip++) {
3300 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3301 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3302 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3303 ip->u.param[2], ip->u.param[3]);
3309 PrintInodeSummary(void)
3312 struct InodeSummary *isp;
3314 for (i = 0; i < nVolumesInInodeFile; i++) {
3315 isp = &inodeSummary[i];
3316 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3321 PrintVolumeSummary(void)
3324 struct VolumeSummary *vsp;
3326 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3327 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3337 assert(0); /* Fork is never executed in the NT code path */
3341 #ifdef AFS_DEMAND_ATTACH_FS
3342 if ((f == 0) && (programType == salvageServer)) {
3343 /* we are a salvageserver child */
3344 #ifdef FSSYNC_BUILD_CLIENT
3345 VChildProcReconnectFS_r();
3347 #ifdef SALVSYNC_BUILD_CLIENT
3351 #endif /* AFS_DEMAND_ATTACH_FS */
3352 #endif /* !AFS_NT40_ENV */
3363 #ifdef AFS_DEMAND_ATTACH_FS
3364 if (programType == salvageServer) {
3365 #ifdef SALVSYNC_BUILD_CLIENT
3368 #ifdef FSSYNC_BUILD_CLIENT
3372 #endif /* AFS_DEMAND_ATTACH_FS */
3375 if (main_thread != pthread_self())
3376 pthread_exit((void *)code);
3389 pid = wait(&status);
3391 if (WCOREDUMP(status))
3392 Log("\"%s\" core dumped!\n", prog);
3393 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3399 TimeStamp(time_t clock, int precision)
3402 static char timestamp[20];
3403 lt = localtime(&clock);
3405 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3407 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3412 CheckLogFile(char * log_path)
3414 char oldSlvgLog[AFSDIR_PATH_MAX];
3416 #ifndef AFS_NT40_ENV
3423 strcpy(oldSlvgLog, log_path);
3424 strcat(oldSlvgLog, ".old");
3426 renamefile(log_path, oldSlvgLog);
3427 logFile = afs_fopen(log_path, "a");
3429 if (!logFile) { /* still nothing, use stdout */
3433 #ifndef AFS_NAMEI_ENV
3434 AFS_DEBUG_IOPS_LOG(logFile);
3439 #ifndef AFS_NT40_ENV
3441 TimeStampLogFile(char * log_path)
3443 char stampSlvgLog[AFSDIR_PATH_MAX];
3448 lt = localtime(&now);
3449 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3450 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3451 log_path, lt->tm_year + 1900,
3452 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3455 /* try to link the logfile to a timestamped filename */
3456 /* if it fails, oh well, nothing we can do */
3457 link(log_path, stampSlvgLog);
3466 #ifndef AFS_NT40_ENV
3468 printf("Can't show log since using syslog.\n");
3477 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3480 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3483 while (fgets(line, sizeof(line), logFile))
3490 Log(const char *format, ...)
3496 va_start(args, format);
3497 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3499 #ifndef AFS_NT40_ENV
3501 syslog(LOG_INFO, "%s", tmp);
3505 gettimeofday(&now, 0);
3506 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3512 Abort(const char *format, ...)
3517 va_start(args, format);
3518 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3520 #ifndef AFS_NT40_ENV
3522 syslog(LOG_INFO, "%s", tmp);
3526 fprintf(logFile, "%s", tmp);
3541 p = (char *)malloc(strlen(s) + 1);
3548 /* Remove the FORCESALVAGE file */
3550 RemoveTheForce(char *path)
3552 if (!Testing && ForceSalvage) {
3553 if (chdir(path) == 0)
3554 unlink("FORCESALVAGE");
3558 #ifndef AFS_AIX32_ENV
3560 * UseTheForceLuke - see if we can use the force
3563 UseTheForceLuke(char *path)
3565 struct afs_stat force;
3567 assert(chdir(path) != -1);
3569 return (afs_stat("FORCESALVAGE", &force) == 0);
3573 * UseTheForceLuke - see if we can use the force
3576 * The VRMIX fsck will not muck with the filesystem it is supposedly
3577 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3578 * muck directly with the root inode, which is within the normal
3580 * ListViceInodes() has a side effect of setting ForceSalvage if
3581 * it detects a need, based on root inode examination.
3584 UseTheForceLuke(char *path)
3587 return 0; /* sorry OB1 */
3592 /* NT support routines */
3594 static char execpathname[MAX_PATH];
3596 nt_SalvagePartition(char *partName, int jobn)
3601 if (!*execpathname) {
3602 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3603 if (!n || n == 1023)
3606 job.cj_magic = SALVAGER_MAGIC;
3607 job.cj_number = jobn;
3608 (void)strcpy(job.cj_part, partName);
3609 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3614 nt_SetupPartitionSalvage(void *datap, int len)
3616 childJob_t *jobp = (childJob_t *) datap;
3617 char logname[AFSDIR_PATH_MAX];
3619 if (len != sizeof(childJob_t))
3621 if (jobp->cj_magic != SALVAGER_MAGIC)
3626 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3628 logFile = afs_fopen(logname, "w");
3636 #endif /* AFS_NT40_ENV */