2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
110 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
111 #define WCOREDUMP(x) (x & 0200)
114 #include <afs/afsint.h>
115 #include <afs/assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
170 #include <afs/afsutil.h>
171 #include <afs/fileutil.h>
172 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
180 #include <afs/afssyscalls.h>
184 #include "partition.h"
185 #include "daemon_com.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
196 /*@+fcnmacros +macrofcndecl@*/
199 extern off64_t afs_lseek(int FD, off64_t O, int F);
200 #endif /*S_SPLINT_S */
201 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
202 #define afs_stat stat64
203 #define afs_fstat fstat64
204 #define afs_open open64
205 #define afs_fopen fopen64
206 #else /* !O_LARGEFILE */
208 extern off_t afs_lseek(int FD, off_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
211 #define afs_stat stat
212 #define afs_fstat fstat
213 #define afs_open open
214 #define afs_fopen fopen
215 #endif /* !O_LARGEFILE */
216 /*@=fcnmacros =macrofcndecl@*/
219 extern void *calloc();
221 static char *TimeStamp(time_t clock, int precision);
224 int debug; /* -d flag */
225 extern int Testing; /* -n flag */
226 int ListInodeOption; /* -i flag */
227 int ShowRootFiles; /* -r flag */
228 int RebuildDirs; /* -sal flag */
229 int Parallel = 4; /* -para X flag */
230 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
231 int forceR = 0; /* -b flag */
232 int ShowLog = 0; /* -showlog flag */
233 int ShowSuid = 0; /* -showsuid flag */
234 int ShowMounts = 0; /* -showmounts flag */
235 int orphans = ORPH_IGNORE; /* -orphans option */
240 int useSyslog = 0; /* -syslog flag */
241 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
250 #define MAXPARALLEL 32
252 int OKToZap; /* -o flag */
253 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
254 * in the volume header */
256 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
258 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
260 Device fileSysDevice; /* The device number of the current
261 * partition being salvaged */
265 char *fileSysPath; /* The path of the mounted partition currently
266 * being salvaged, i.e. the directory
267 * containing the volume headers */
269 char *fileSysPathName; /* NT needs this to make name pretty in log. */
270 IHandle_t *VGLinkH; /* Link handle for current volume group. */
271 int VGLinkH_cnt; /* # of references to lnk handle. */
272 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
274 char *fileSysDeviceName; /* The block device where the file system
275 * being salvaged was mounted */
276 char *filesysfulldev;
278 int VolumeChanged; /* Set by any routine which would change the volume in
279 * a way which would require callback is to be broken if the
280 * volume was put back on line by an active file server */
282 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
284 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
285 int inodeFd; /* File descriptor for inode file */
288 struct VnodeInfo vnodeInfo[nVNODECLASSES];
291 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
292 int nVolumes; /* Number of volumes (read-write and read-only)
293 * in volume summary */
299 /* Forward declarations */
300 /*@printflike@*/ void Log(const char *format, ...);
301 /*@printflike@*/ void Abort(const char *format, ...);
302 static int IsVnodeOrphaned(VnodeId vnode);
304 /* Uniquifier stored in the Inode */
309 return (u & 0x3fffff);
311 #if defined(AFS_SGI_EXMAG)
312 return (u & SGI_UNIQMASK);
315 #endif /* AFS_SGI_EXMAG */
320 BadError(register int aerror)
322 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
324 return 0; /* otherwise may be transient, e.g. EMFILE */
329 char *save_args[MAX_ARGS];
331 extern pthread_t main_thread;
332 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
335 /* Get the salvage lock if not already held. Hold until process exits. */
337 ObtainSalvageLock(void)
343 (int)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
344 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
345 if (salvageLock == (int)INVALID_HANDLE_VALUE) {
347 "salvager: There appears to be another salvager running! Aborted.\n");
352 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
353 if (salvageLock < 0) {
355 "salvager: can't open salvage lock file %s, aborting\n",
356 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
359 #ifdef AFS_DARWIN_ENV
360 if (flock(salvageLock, LOCK_EX) == -1) {
362 if (lockf(salvageLock, F_LOCK, 0) == -1) {
365 "salvager: There appears to be another salvager running! Aborted.\n");
372 #ifdef AFS_SGI_XFS_IOPS_ENV
373 /* Check if the given partition is mounted. For XFS, the root inode is not a
374 * constant. So we check the hard way.
377 IsPartitionMounted(char *part)
380 struct mntent *mntent;
382 assert(mntfp = setmntent(MOUNTED, "r"));
383 while (mntent = getmntent(mntfp)) {
384 if (!strcmp(part, mntent->mnt_dir))
389 return mntent ? 1 : 1;
392 /* Check if the given inode is the root of the filesystem. */
393 #ifndef AFS_SGI_XFS_IOPS_ENV
395 IsRootInode(struct afs_stat *status)
398 * The root inode is not a fixed value in XFS partitions. So we need to
399 * see if the partition is in the list of mounted partitions. This only
400 * affects the SalvageFileSys path, so we check there.
402 return (status->st_ino == ROOTINODE);
407 #ifndef AFS_NAMEI_ENV
408 /* We don't want to salvage big files filesystems, since we can't put volumes on
412 CheckIfBigFilesFS(char *mountPoint, char *devName)
414 struct superblock fs;
417 if (strncmp(devName, "/dev/", 5)) {
418 (void)sprintf(name, "/dev/%s", devName);
420 (void)strcpy(name, devName);
423 if (ReadSuper(&fs, name) < 0) {
424 Log("Unable to read superblock. Not salvaging partition %s.\n",
428 if (IsBigFilesFileSystem(&fs)) {
429 Log("Partition %s is a big files filesystem, not salvaging.\n",
439 #define HDSTR "\\Device\\Harddisk"
440 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
442 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
447 static int dowarn = 1;
449 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
451 if (strncmp(res, HDSTR, HDLEN)) {
454 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
455 res, HDSTR, p1->devName);
459 d1 = atoi(&res[HDLEN]);
461 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
463 if (strncmp(res, HDSTR, HDLEN)) {
466 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
467 res, HDSTR, p2->devName);
471 d2 = atoi(&res[HDLEN]);
476 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
479 /* This assumes that two partitions with the same device number divided by
480 * PartsPerDisk are on the same disk.
483 SalvageFileSysParallel(struct DiskPartition64 *partP)
486 struct DiskPartition64 *partP;
487 int pid; /* Pid for this job */
488 int jobnumb; /* Log file job number */
489 struct job *nextjob; /* Next partition on disk to salvage */
491 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
492 struct job *thisjob = 0;
493 static int numjobs = 0;
494 static int jobcount = 0;
500 char logFileName[256];
504 /* We have a partition to salvage. Copy it into thisjob */
505 thisjob = (struct job *)malloc(sizeof(struct job));
507 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
510 memset(thisjob, 0, sizeof(struct job));
511 thisjob->partP = partP;
512 thisjob->jobnumb = jobcount;
514 } else if (jobcount == 0) {
515 /* We are asking to wait for all jobs (partp == 0), yet we never
518 Log("No file system partitions named %s* found; not salvaged\n",
519 VICE_PARTITION_PREFIX);
523 if (debug || Parallel == 1) {
525 SalvageFileSys(thisjob->partP, 0);
532 /* Check to see if thisjob is for a disk that we are already
533 * salvaging. If it is, link it in as the next job to do. The
534 * jobs array has 1 entry per disk being salvages. numjobs is
535 * the total number of disks currently being salvaged. In
536 * order to keep thejobs array compact, when a disk is
537 * completed, the hightest element in the jobs array is moved
538 * down to now open slot.
540 for (j = 0; j < numjobs; j++) {
541 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
542 /* On same disk, add it to this list and return */
543 thisjob->nextjob = jobs[j]->nextjob;
544 jobs[j]->nextjob = thisjob;
551 /* Loop until we start thisjob or until all existing jobs are finished */
552 while (thisjob || (!partP && (numjobs > 0))) {
553 startjob = -1; /* No new job to start */
555 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
556 /* Either the max jobs are running or we have to wait for all
557 * the jobs to finish. In either case, we wait for at least one
558 * job to finish. When it's done, clean up after it.
560 pid = wait(&wstatus);
562 for (j = 0; j < numjobs; j++) { /* Find which job it is */
563 if (pid == jobs[j]->pid)
567 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
568 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
571 numjobs--; /* job no longer running */
572 oldjob = jobs[j]; /* remember */
573 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
574 free(oldjob); /* free the old job */
576 /* If there is another partition on the disk to salvage, then
577 * say we will start it (startjob). If not, then put thisjob there
578 * and say we will start it.
580 if (jobs[j]) { /* Another partitions to salvage */
581 startjob = j; /* Will start it */
582 } else { /* There is not another partition to salvage */
584 jobs[j] = thisjob; /* Add thisjob */
586 startjob = j; /* Will start it */
588 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
589 startjob = -1; /* Don't start it - already running */
593 /* We don't have to wait for a job to complete */
595 jobs[numjobs] = thisjob; /* Add this job */
597 startjob = numjobs; /* Will start it */
601 /* Start up a new salvage job on a partition in job slot "startjob" */
602 if (startjob != -1) {
604 Log("Starting salvage of file system partition %s\n",
605 jobs[startjob]->partP->name);
607 /* For NT, we not only fork, but re-exec the salvager. Pass in the
608 * commands and pass the child job number via the data path.
611 nt_SalvagePartition(jobs[startjob]->partP->name,
612 jobs[startjob]->jobnumb);
613 jobs[startjob]->pid = pid;
618 jobs[startjob]->pid = pid;
624 for (fd = 0; fd < 16; fd++)
631 openlog("salvager", LOG_PID, useSyslogFacility);
635 (void)afs_snprintf(logFileName, sizeof logFileName,
637 AFSDIR_SERVER_SLVGLOG_FILEPATH,
638 jobs[startjob]->jobnumb);
639 logFile = afs_fopen(logFileName, "w");
644 SalvageFileSys1(jobs[startjob]->partP, 0);
649 } /* while ( thisjob || (!partP && numjobs > 0) ) */
651 /* If waited for all jobs to complete, now collect log files and return */
653 if (!useSyslog) /* if syslogging - no need to collect */
656 for (i = 0; i < jobcount; i++) {
657 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
658 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
659 if ((passLog = afs_fopen(logFileName, "r"))) {
660 while (fgets(buf, sizeof(buf), passLog)) {
665 (void)unlink(logFileName);
674 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
676 if (!canfork || debug || Fork() == 0) {
677 SalvageFileSys1(partP, singleVolumeNumber);
678 if (canfork && !debug) {
683 Wait("SalvageFileSys");
687 get_DevName(char *pbuffer, char *wpath)
689 char pbuf[128], *ptr;
690 strcpy(pbuf, pbuffer);
691 ptr = (char *)strrchr(pbuf, '/');
697 ptr = (char *)strrchr(pbuffer, '/');
699 strcpy(pbuffer, ptr + 1);
706 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
709 char inodeListPath[256];
710 static char tmpDevName[100];
711 static char wpath[100];
712 struct VolumeSummary *vsp, *esp;
715 fileSysPartition = partP;
716 fileSysDevice = fileSysPartition->device;
717 fileSysPathName = VPartitionPath(fileSysPartition);
720 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
721 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
722 name = partP->devName;
724 fileSysPath = fileSysPathName;
725 strcpy(tmpDevName, partP->devName);
726 name = get_DevName(tmpDevName, wpath);
727 fileSysDeviceName = name;
728 filesysfulldev = wpath;
731 VLockPartition(partP->name);
732 if (singleVolumeNumber || ForceSalvage)
735 ForceSalvage = UseTheForceLuke(fileSysPath);
737 if (singleVolumeNumber) {
738 /* salvageserver already setup fssync conn for us */
739 if ((programType != salvageServer) && !VConnectFS()) {
740 Abort("Couldn't connect to file server\n");
742 AskOffline(singleVolumeNumber, partP->name);
745 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
746 partP->name, name, (Testing ? "(READONLY mode)" : ""));
748 Log("***Forced salvage of all volumes on this partition***\n");
753 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
760 assert((dirp = opendir(fileSysPath)) != NULL);
761 while ((dp = readdir(dirp))) {
762 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
763 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
765 Log("Removing old salvager temp files %s\n", dp->d_name);
766 strcpy(npath, fileSysPath);
768 strcat(npath, dp->d_name);
774 tdir = (tmpdir ? tmpdir : fileSysPath);
776 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
777 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
779 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
782 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
783 unlink(inodeListPath);
787 /* Using nt_unlink here since we're really using the delete on close
788 * semantics of unlink. In most places in the salvager, we really do
789 * mean to unlink the file at that point. Those places have been
790 * modified to actually do that so that the NT crt can be used there.
793 _open_osfhandle((long)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
794 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
796 inodeFd = afs_open(inodeListPath, O_RDONLY);
797 unlink(inodeListPath);
800 Abort("Temporary file %s is missing...\n", inodeListPath);
801 if (ListInodeOption) {
805 /* enumerate volumes in the partition.
806 * figure out sets of read-only + rw volumes.
807 * salvage each set, read-only volumes first, then read-write.
808 * Fix up inodes on last volume in set (whether it is read-write
811 GetVolumeSummary(singleVolumeNumber);
813 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
814 i < nVolumesInInodeFile; i = j) {
815 VolumeId rwvid = inodeSummary[i].RWvolumeId;
817 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
819 VolumeId vid = inodeSummary[j].volumeId;
820 struct VolumeSummary *tsp;
821 /* Scan volume list (from partition root directory) looking for the
822 * current rw volume number in the volume list from the inode scan.
823 * If there is one here that is not in the inode volume list,
825 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
827 DeleteExtraVolumeHeaderFile(vsp);
829 /* Now match up the volume summary info from the root directory with the
830 * entry in the volume list obtained from scanning inodes */
831 inodeSummary[j].volSummary = NULL;
832 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
833 if (tsp->header.id == vid) {
834 inodeSummary[j].volSummary = tsp;
840 /* Salvage the group of volumes (several read-only + 1 read/write)
841 * starting with the current read-only volume we're looking at.
843 SalvageVolumeGroup(&inodeSummary[i], j - i);
846 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
847 for (; vsp < esp; vsp++) {
849 DeleteExtraVolumeHeaderFile(vsp);
852 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
853 RemoveTheForce(fileSysPath);
855 if (!Testing && singleVolumeNumber) {
856 AskOnline(singleVolumeNumber, fileSysPartition->name);
858 /* Step through the volumeSummary list and set all volumes on-line.
859 * The volumes were taken off-line in GetVolumeSummary.
861 for (j = 0; j < nVolumes; j++) {
862 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
866 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
867 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
870 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
874 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
877 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", vsp->fileName, (Testing ? "would have been " : ""));
879 unlink(vsp->fileName);
883 CompareInodes(const void *_p1, const void *_p2)
885 register const struct ViceInodeInfo *p1 = _p1;
886 register const struct ViceInodeInfo *p2 = _p2;
887 if (p1->u.vnode.vnodeNumber == INODESPECIAL
888 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
889 VolumeId p1rwid, p2rwid;
891 (p1->u.vnode.vnodeNumber ==
892 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
894 (p2->u.vnode.vnodeNumber ==
895 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
900 if (p1->u.vnode.vnodeNumber == INODESPECIAL
901 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
902 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
903 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
904 if (p1->u.vnode.volumeId == p1rwid)
906 if (p2->u.vnode.volumeId == p2rwid)
908 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
910 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
911 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
912 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
914 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
916 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
918 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
920 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
922 /* The following tests are reversed, so that the most desirable
923 * of several similar inodes comes first */
924 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
926 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
927 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
931 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
932 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
937 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
939 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
940 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
944 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
945 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
950 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
952 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
953 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
957 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
958 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
963 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
965 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
966 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
970 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
971 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
980 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
981 register struct InodeSummary *summary)
983 int volume = ip->u.vnode.volumeId;
984 int rwvolume = volume;
985 register n, nSpecial;
986 register Unique maxunique;
989 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
991 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
993 rwvolume = ip->u.special.parentId;
994 /* This isn't quite right, as there could (in error) be different
995 * parent inodes in different special vnodes */
997 if (maxunique < ip->u.vnode.vnodeUniquifier)
998 maxunique = ip->u.vnode.vnodeUniquifier;
1002 summary->volumeId = volume;
1003 summary->RWvolumeId = rwvolume;
1004 summary->nInodes = n;
1005 summary->nSpecialInodes = nSpecial;
1006 summary->maxUniquifier = maxunique;
1010 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, int singleVolumeNumber, void *rock)
1012 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1013 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1014 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1019 * Collect list of inodes in file named by path. If a truly fatal error,
1020 * unlink the file and abort. For lessor errors, return -1. The file will
1021 * be unlinked by the caller.
1024 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1026 struct afs_stat status;
1028 struct ViceInodeInfo *ip;
1029 struct InodeSummary summary;
1030 char summaryFileName[50];
1033 char *dev = fileSysPath;
1034 char *wpath = fileSysPath;
1036 char *dev = fileSysDeviceName;
1037 char *wpath = filesysfulldev;
1039 char *part = fileSysPath;
1042 /* This file used to come from vfsck; cobble it up ourselves now... */
1044 ListViceInodes(dev, fileSysPath, path,
1045 singleVolumeNumber ? OnlyOneVolume : 0,
1046 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1048 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1052 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1054 if (forceSal && !ForceSalvage) {
1055 Log("***Forced salvage of all volumes on this partition***\n");
1058 inodeFd = afs_open(path, O_RDWR);
1059 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1061 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1063 tdir = (tmpdir ? tmpdir : part);
1065 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1066 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1068 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1069 "%s/salvage.temp.%d", tdir, getpid());
1071 summaryFile = afs_fopen(summaryFileName, "a+");
1072 if (summaryFile == NULL) {
1075 Abort("Unable to create inode summary file\n");
1077 if (!canfork || debug || Fork() == 0) {
1079 unsigned long st_size=(unsigned long) status.st_size;
1080 nInodes = st_size / sizeof(struct ViceInodeInfo);
1082 fclose(summaryFile);
1084 unlink(summaryFileName);
1085 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1086 RemoveTheForce(fileSysPath);
1088 struct VolumeSummary *vsp;
1091 GetVolumeSummary(singleVolumeNumber);
1093 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1095 DeleteExtraVolumeHeaderFile(vsp);
1098 Log("%s vice inodes on %s; not salvaged\n",
1099 singleVolumeNumber ? "No applicable" : "No", dev);
1102 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1104 fclose(summaryFile);
1107 unlink(summaryFileName);
1109 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1112 if (read(inodeFd, ip, st_size) != st_size) {
1113 fclose(summaryFile);
1116 unlink(summaryFileName);
1117 Abort("Unable to read inode table; %s not salvaged\n", dev);
1119 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1120 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1121 || write(inodeFd, ip, st_size) != st_size) {
1122 fclose(summaryFile);
1125 unlink(summaryFileName);
1126 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1130 CountVolumeInodes(ip, nInodes, &summary);
1131 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1132 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1133 fclose(summaryFile);
1137 summary.index += (summary.nInodes);
1138 nInodes -= summary.nInodes;
1139 ip += summary.nInodes;
1141 /* Following fflush is not fclose, because if it was debug mode would not work */
1142 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1143 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1144 fclose(summaryFile);
1148 if (canfork && !debug) {
1153 if (Wait("Inode summary") == -1) {
1154 fclose(summaryFile);
1157 unlink(summaryFileName);
1158 Exit(1); /* salvage of this partition aborted */
1161 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1162 if (status.st_size != 0) {
1164 unsigned long st_status=(unsigned long)status.st_size;
1165 inodeSummary = (struct InodeSummary *)malloc(st_status);
1166 assert(inodeSummary != NULL);
1167 /* For GNU we need to do lseek to get the file pointer moved. */
1168 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1169 ret = read(fileno(summaryFile), inodeSummary, st_status);
1170 assert(ret == st_status);
1172 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1173 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1174 fclose(summaryFile);
1176 unlink(summaryFileName);
1180 /* Comparison routine for volume sort.
1181 This is setup so that a read-write volume comes immediately before
1182 any read-only clones of that volume */
1184 CompareVolumes(const void *_p1, const void *_p2)
1186 register const struct VolumeSummary *p1 = _p1;
1187 register const struct VolumeSummary *p2 = _p2;
1188 if (p1->header.parent != p2->header.parent)
1189 return p1->header.parent < p2->header.parent ? -1 : 1;
1190 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1192 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1194 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1198 GetVolumeSummary(VolumeId singleVolumeNumber)
1201 afs_int32 nvols = 0;
1202 struct VolumeSummary *vsp, vs;
1203 struct VolumeDiskHeader diskHeader;
1206 /* Get headers from volume directory */
1207 dirp = opendir(fileSysPath);
1209 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1210 if (!singleVolumeNumber) {
1211 while ((dp = readdir(dirp))) {
1212 char *p = dp->d_name;
1213 p = strrchr(dp->d_name, '.');
1214 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1216 if ((fd = afs_open(dp->d_name, O_RDONLY)) != -1
1217 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1218 == sizeof(diskHeader)
1219 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1220 DiskToVolumeHeader(&vs.header, &diskHeader);
1228 dirp = opendir("."); /* No rewinddir for NT */
1235 (struct VolumeSummary *)malloc(nvols *
1236 sizeof(struct VolumeSummary));
1239 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1240 assert(volumeSummaryp != NULL);
1243 vsp = volumeSummaryp;
1244 while ((dp = readdir(dirp))) {
1245 char *p = dp->d_name;
1246 p = strrchr(dp->d_name, '.');
1247 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1250 if ((fd = afs_open(dp->d_name, O_RDONLY)) == -1
1251 || read(fd, &diskHeader, sizeof(diskHeader))
1252 != sizeof(diskHeader)
1253 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1258 if (!singleVolumeNumber) {
1260 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1265 char nameShouldBe[64];
1266 DiskToVolumeHeader(&vsp->header, &diskHeader);
1267 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1268 && vsp->header.parent != singleVolumeNumber) {
1269 if (programType == salvageServer) {
1270 #ifdef SALVSYNC_BUILD_CLIENT
1271 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1272 vsp->header.id, vsp->header.parent);
1273 if (SALVSYNC_LinkVolume(vsp->header.parent,
1275 fileSysPartition->name,
1277 Log("schedule request failed\n");
1280 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1282 Log("%u is a read-only volume; not salvaged\n",
1283 singleVolumeNumber);
1287 if (!singleVolumeNumber
1288 || (vsp->header.id == singleVolumeNumber
1289 || vsp->header.parent == singleVolumeNumber)) {
1290 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1291 VFORMAT, vsp->header.id);
1292 if (singleVolumeNumber
1293 && vsp->header.id != singleVolumeNumber)
1294 AskOffline(vsp->header.id, fileSysPartition->name);
1295 if (strcmp(nameShouldBe, dp->d_name)) {
1297 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1301 vsp->fileName = ToString(dp->d_name);
1311 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1315 /* Find the link table. This should be associated with the RW volume or, if
1316 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1319 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1320 struct ViceInodeInfo *allInodes)
1323 struct ViceInodeInfo *ip;
1325 for (i = 0; i < nVols; i++) {
1326 ip = allInodes + isp[i].index;
1327 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1328 if (ip[j].u.special.type == VI_LINKTABLE)
1329 return ip[j].inodeNumber;
1336 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1338 struct versionStamp version;
1341 if (!VALID_INO(ino))
1343 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1344 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1345 if (!VALID_INO(ino))
1347 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1348 isp->RWvolumeId, errno);
1349 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1350 fdP = IH_OPEN(VGLinkH);
1352 Abort("Can't open link table for volume %u (error = %d)\n",
1353 isp->RWvolumeId, errno);
1355 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1356 Abort("Can't truncate link table for volume %u (error = %d)\n",
1357 isp->RWvolumeId, errno);
1359 version.magic = LINKTABLEMAGIC;
1360 version.version = LINKTABLEVERSION;
1362 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1364 Abort("Can't truncate link table for volume %u (error = %d)\n",
1365 isp->RWvolumeId, errno);
1367 FDH_REALLYCLOSE(fdP);
1369 /* If the volume summary exits (i.e., the V*.vol header file exists),
1370 * then set this inode there as well.
1372 if (isp->volSummary)
1373 isp->volSummary->header.linkTable = ino;
1382 SVGParms_t *parms = (SVGParms_t *) arg;
1383 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1388 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1391 pthread_attr_t tattr;
1395 /* Initialize per volume global variables, even if later code does so */
1399 memset(&VolInfo, 0, sizeof(VolInfo));
1401 parms.svgp_inodeSummaryp = isp;
1402 parms.svgp_count = nVols;
1403 code = pthread_attr_init(&tattr);
1405 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1409 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1411 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1414 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1416 Log("Failed to create thread to salvage volume group %u\n",
1420 (void)pthread_join(tid, NULL);
1422 #endif /* AFS_NT40_ENV */
1425 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1427 struct ViceInodeInfo *inodes, *allInodes, *ip;
1428 int i, totalInodes, size, salvageTo;
1432 int dec_VGLinkH = 0;
1434 FdHandle_t *fdP = NULL;
1437 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1438 && isp->nSpecialInodes > 0);
1439 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1440 if (!ForceSalvage && QuickCheck(isp, nVols))
1443 if (ShowMounts && !haveRWvolume)
1445 if (canfork && !debug && Fork() != 0) {
1446 (void)Wait("Salvage volume group");
1449 for (i = 0, totalInodes = 0; i < nVols; i++)
1450 totalInodes += isp[i].nInodes;
1451 size = totalInodes * sizeof(struct ViceInodeInfo);
1452 inodes = (struct ViceInodeInfo *)malloc(size);
1453 allInodes = inodes - isp->index; /* this would the base of all the inodes
1454 * for the partition, if all the inodes
1455 * had been read into memory */
1457 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1459 assert(read(inodeFd, inodes, size) == size);
1461 /* Don't try to salvage a read write volume if there isn't one on this
1463 salvageTo = haveRWvolume ? 0 : 1;
1465 #ifdef AFS_NAMEI_ENV
1466 ino = FindLinkHandle(isp, nVols, allInodes);
1467 if (VALID_INO(ino)) {
1468 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1469 fdP = IH_OPEN(VGLinkH);
1471 if (!VALID_INO(ino) || fdP == NULL) {
1472 Log("%s link table for volume %u.\n",
1473 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1475 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1478 struct ViceInodeInfo *ip;
1479 CreateLinkTable(isp, ino);
1480 fdP = IH_OPEN(VGLinkH);
1481 /* Sync fake 1 link counts to the link table, now that it exists */
1483 for (i = 0; i < nVols; i++) {
1484 ip = allInodes + isp[i].index;
1485 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1487 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1489 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1497 FDH_REALLYCLOSE(fdP);
1499 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1502 /* Salvage in reverse order--read/write volume last; this way any
1503 * Inodes not referenced by the time we salvage the read/write volume
1504 * can be picked up by the read/write volume */
1505 /* ACTUALLY, that's not done right now--the inodes just vanish */
1506 for (i = nVols - 1; i >= salvageTo; i--) {
1508 struct InodeSummary *lisp = &isp[i];
1509 #ifdef AFS_NAMEI_ENV
1510 /* If only the RO is present on this partition, the link table
1511 * shows up as a RW volume special file. Need to make sure the
1512 * salvager doesn't try to salvage the non-existent RW.
1514 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1515 /* If this only special inode is the link table, continue */
1516 if (inodes->u.special.type == VI_LINKTABLE) {
1523 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1524 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1525 /* Check inodes twice. The second time do things seriously. This
1526 * way the whole RO volume can be deleted, below, if anything goes wrong */
1527 for (check = 1; check >= 0; check--) {
1529 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1531 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1532 if (rw && deleteMe) {
1533 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1534 * volume won't be called */
1540 if (rw && check == 1)
1542 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1543 MaybeZapVolume(lisp, "Vnode index", 0, check);
1549 /* Fix actual inode counts */
1551 Log("totalInodes %d\n",totalInodes);
1552 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1553 static int TraceBadLinkCounts = 0;
1554 #ifdef AFS_NAMEI_ENV
1555 if (VGLinkH->ih_ino == ip->inodeNumber) {
1556 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1557 VGLinkH_p1 = ip->u.param[0];
1558 continue; /* Deal with this last. */
1561 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1562 TraceBadLinkCounts--; /* Limit reports, per volume */
1563 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1565 while (ip->linkCount > 0) {
1566 /* below used to assert, not break */
1568 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1569 Log("idec failed. inode %s errno %d\n",
1570 PrintInode(NULL, ip->inodeNumber), errno);
1576 while (ip->linkCount < 0) {
1577 /* these used to be asserts */
1579 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1580 Log("iinc failed. inode %s errno %d\n",
1581 PrintInode(NULL, ip->inodeNumber), errno);
1588 #ifdef AFS_NAMEI_ENV
1589 while (dec_VGLinkH > 0) {
1590 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1591 Log("idec failed on link table, errno = %d\n", errno);
1595 while (dec_VGLinkH < 0) {
1596 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1597 Log("iinc failed on link table, errno = %d\n", errno);
1604 /* Directory consistency checks on the rw volume */
1606 SalvageVolume(isp, VGLinkH);
1607 IH_RELEASE(VGLinkH);
1609 if (canfork && !debug) {
1616 QuickCheck(register struct InodeSummary *isp, int nVols)
1618 /* Check headers BEFORE forking */
1622 for (i = 0; i < nVols; i++) {
1623 struct VolumeSummary *vs = isp[i].volSummary;
1624 VolumeDiskData volHeader;
1626 /* Don't salvage just because phantom rw volume is there... */
1627 /* (If a read-only volume exists, read/write inodes must also exist) */
1628 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1632 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1633 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1634 == sizeof(volHeader)
1635 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1636 && volHeader.dontSalvage == DONT_SALVAGE
1637 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1638 if (volHeader.inUse != 0) {
1639 volHeader.inUse = 0;
1640 volHeader.inService = 1;
1642 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1643 != sizeof(volHeader)) {
1659 /* SalvageVolumeHeaderFile
1661 * Salvage the top level V*.vol header file. Make sure the special files
1662 * exist and that there are no duplicates.
1664 * Calls SalvageHeader for each possible type of volume special file.
1668 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1669 register struct ViceInodeInfo *inodes, int RW,
1670 int check, int *deleteMe)
1674 register struct ViceInodeInfo *ip;
1675 int allinodesobsolete = 1;
1676 struct VolumeDiskHeader diskHeader;
1680 memset(&tempHeader, 0, sizeof(tempHeader));
1681 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1682 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1683 tempHeader.id = isp->volumeId;
1684 tempHeader.parent = isp->RWvolumeId;
1685 /* Check for duplicates (inodes are sorted by type field) */
1686 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1687 ip = &inodes[isp->index + i];
1688 if (ip->u.special.type == (ip + 1)->u.special.type) {
1690 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1694 for (i = 0; i < isp->nSpecialInodes; i++) {
1695 ip = &inodes[isp->index + i];
1696 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1698 Log("Rubbish header inode\n");
1701 Log("Rubbish header inode; deleted\n");
1702 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1703 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1704 if (!check && ip->u.special.type != VI_LINKTABLE)
1705 ip->linkCount--; /* Keep the inode around */
1706 allinodesobsolete = 0;
1710 if (allinodesobsolete) {
1717 VGLinkH_cnt++; /* one for every header. */
1719 if (!RW && !check && isp->volSummary) {
1720 ClearROInUseBit(isp->volSummary);
1724 for (i = 0; i < MAXINODETYPE; i++) {
1725 if (stuff[i].inodeType == VI_LINKTABLE) {
1726 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1727 * And we may have recreated the link table earlier, so set the
1728 * RW header as well.
1730 if (VALID_INO(VGLinkH->ih_ino)) {
1731 *stuff[i].inode = VGLinkH->ih_ino;
1735 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1739 if (isp->volSummary == NULL) {
1741 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1743 Log("No header file for volume %u\n", isp->volumeId);
1747 Log("No header file for volume %u; %screating %s/%s\n",
1748 isp->volumeId, (Testing ? "it would have been " : ""),
1749 fileSysPathName, name);
1750 headerFd = afs_open(name, O_RDWR | O_CREAT | O_TRUNC, 0644);
1751 assert(headerFd != -1);
1752 isp->volSummary = (struct VolumeSummary *)
1753 malloc(sizeof(struct VolumeSummary));
1754 isp->volSummary->fileName = ToString(name);
1757 /* hack: these two fields are obsolete... */
1758 isp->volSummary->header.volumeAcl = 0;
1759 isp->volSummary->header.volumeMountTable = 0;
1762 (&isp->volSummary->header, &tempHeader,
1763 sizeof(struct VolumeHeader))) {
1764 /* We often remove the name before calling us, so we make a fake one up */
1765 if (isp->volSummary->fileName) {
1766 strcpy(name, isp->volSummary->fileName);
1768 (void)afs_snprintf(name, sizeof name, VFORMAT, isp->volumeId);
1769 isp->volSummary->fileName = ToString(name);
1772 Log("Header file %s is damaged or no longer valid%s\n", name,
1773 (check ? "" : "; repairing"));
1777 headerFd = afs_open(name, O_RDWR | O_TRUNC, 0644);
1778 assert(headerFd != -1);
1782 memcpy(&isp->volSummary->header, &tempHeader,
1783 sizeof(struct VolumeHeader));
1786 Log("It would have written a new header file for volume %u\n",
1789 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1790 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1791 != sizeof(struct VolumeDiskHeader)) {
1792 Log("Couldn't rewrite volume header file!\n");
1799 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1800 isp->volSummary->header.volumeInfo);
1805 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1809 VolumeDiskData volumeInfo;
1810 struct versionStamp fileHeader;
1819 #ifndef AFS_NAMEI_ENV
1820 if (sp->inodeType == VI_LINKTABLE)
1823 if (*(sp->inode) == 0) {
1825 Log("Missing inode in volume header (%s)\n", sp->description);
1829 Log("Missing inode in volume header (%s); %s\n", sp->description,
1830 (Testing ? "it would have recreated it" : "recreating"));
1833 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1834 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1835 if (!VALID_INO(*(sp->inode)))
1837 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1838 sp->description, errno);
1843 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1844 fdP = IH_OPEN(specH);
1845 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1846 /* bail out early and destroy the volume */
1848 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1855 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1856 sp->description, errno);
1859 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1860 || header.fileHeader.magic != sp->stamp.magic)) {
1862 Log("Part of the header (%s) is corrupted\n", sp->description);
1863 FDH_REALLYCLOSE(fdP);
1867 Log("Part of the header (%s) is corrupted; recreating\n",
1871 if (sp->inodeType == VI_VOLINFO
1872 && header.volumeInfo.destroyMe == DESTROY_ME) {
1875 FDH_REALLYCLOSE(fdP);
1879 if (recreate && !Testing) {
1882 ("Internal error: recreating volume header (%s) in check mode\n",
1884 code = FDH_TRUNC(fdP, 0);
1886 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1887 sp->description, errno);
1889 /* The following code should be moved into vutil.c */
1890 if (sp->inodeType == VI_VOLINFO) {
1892 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1893 header.volumeInfo.stamp = sp->stamp;
1894 header.volumeInfo.id = isp->volumeId;
1895 header.volumeInfo.parentId = isp->RWvolumeId;
1896 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1897 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1898 isp->volumeId, isp->volumeId);
1899 header.volumeInfo.inService = 0;
1900 header.volumeInfo.blessed = 0;
1901 /* The + 1000 is a hack in case there are any files out in venus caches */
1902 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1903 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1904 header.volumeInfo.needsCallback = 0;
1905 gettimeofday(&tp, 0);
1906 header.volumeInfo.creationDate = tp.tv_sec;
1907 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1909 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1910 sp->description, errno);
1913 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1914 sizeof(header.volumeInfo));
1915 if (code != sizeof(header.volumeInfo)) {
1918 ("Unable to write volume header file (%s) (errno = %d)\n",
1919 sp->description, errno);
1920 Abort("Unable to write entire volume header file (%s)\n",
1924 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1926 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1927 sp->description, errno);
1929 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1930 if (code != sizeof(sp->stamp)) {
1933 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1934 sp->description, errno);
1936 ("Unable to write entire version stamp in volume header file (%s)\n",
1941 FDH_REALLYCLOSE(fdP);
1943 if (sp->inodeType == VI_VOLINFO) {
1944 VolInfo = header.volumeInfo;
1947 if (VolInfo.updateDate) {
1948 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1950 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1951 (Testing ? "it would have been " : ""), update);
1953 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1955 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1956 VolInfo.id, update);
1966 SalvageVnodes(register struct InodeSummary *rwIsp,
1967 register struct InodeSummary *thisIsp,
1968 register struct ViceInodeInfo *inodes, int check)
1970 int ilarge, ismall, ioffset, RW, nInodes;
1971 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1974 RW = (rwIsp == thisIsp);
1975 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1977 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1978 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1979 if (check && ismall == -1)
1982 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1983 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1984 return (ilarge == 0 && ismall == 0 ? 0 : -1);
1988 SalvageIndex(Inode ino, VnodeClass class, int RW,
1989 register struct ViceInodeInfo *ip, int nInodes,
1990 struct VolumeSummary *volSummary, int check)
1992 VolumeId volumeNumber;
1993 char buf[SIZEOF_LARGEDISKVNODE];
1994 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
1996 StreamHandle_t *file;
1997 struct VnodeClassInfo *vcp;
1999 afs_fsize_t vnodeLength;
2000 int vnodeIndex, nVnodes;
2001 afs_ino_str_t stmp1, stmp2;
2005 volumeNumber = volSummary->header.id;
2006 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2007 fdP = IH_OPEN(handle);
2008 assert(fdP != NULL);
2009 file = FDH_FDOPEN(fdP, "r+");
2010 assert(file != NULL);
2011 vcp = &VnodeClassInfo[class];
2012 size = OS_SIZE(fdP->fd_fd);
2014 nVnodes = (size / vcp->diskSize) - 1;
2016 assert((nVnodes + 1) * vcp->diskSize == size);
2017 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2021 for (vnodeIndex = 0;
2022 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2023 nVnodes--, vnodeIndex++) {
2024 if (vnode->type != vNull) {
2025 int vnodeChanged = 0;
2026 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2027 /* Log programs that belong to root (potentially suid root);
2028 * don't bother for read-only or backup volumes */
2029 #ifdef notdef /* This is done elsewhere */
2030 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2031 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2033 if (VNDISK_GET_INO(vnode) == 0) {
2035 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2036 memset(vnode, 0, vcp->diskSize);
2040 if (vcp->magic != vnode->vnodeMagic) {
2041 /* bad magic #, probably partially created vnode */
2042 Log("Partially allocated vnode %d deleted.\n",
2044 memset(vnode, 0, vcp->diskSize);
2048 /* ****** Should do a bit more salvage here: e.g. make sure
2049 * vnode type matches what it should be given the index */
2050 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2051 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2052 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2053 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2060 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2061 /* The following doesn't work, because the version number
2062 * is not maintained correctly by the file server */
2063 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2064 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2066 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2072 /* For RW volume, look for vnode with matching inode number;
2073 * if no such match, take the first determined by our sort
2075 register struct ViceInodeInfo *lip = ip;
2076 register int lnInodes = nInodes;
2078 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2079 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2088 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2089 /* "Matching" inode */
2093 vu = vnode->uniquifier;
2094 iu = ip->u.vnode.vnodeUniquifier;
2095 vd = vnode->dataVersion;
2096 id = ip->u.vnode.inodeDataVersion;
2098 * Because of the possibility of the uniquifier overflows (> 4M)
2099 * we compare them modulo the low 22-bits; we shouldn't worry
2100 * about mismatching since they shouldn't to many old
2101 * uniquifiers of the same vnode...
2103 if (IUnique(vu) != IUnique(iu)) {
2105 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2108 vnode->uniquifier = iu;
2109 #ifdef AFS_3DISPARES
2110 vnode->dataVersion = (id >= vd ?
2113 1887437 ? vd : id) :
2116 1887437 ? id : vd));
2118 #if defined(AFS_SGI_EXMAG)
2119 vnode->dataVersion = (id >= vd ?
2122 15099494 ? vd : id) :
2125 15099494 ? id : vd));
2127 vnode->dataVersion = (id > vd ? id : vd);
2128 #endif /* AFS_SGI_EXMAG */
2129 #endif /* AFS_3DISPARES */
2132 /* don't bother checking for vd > id any more, since
2133 * partial file transfers always result in this state,
2134 * and you can't do much else anyway (you've already
2135 * found the best data you can) */
2136 #ifdef AFS_3DISPARES
2137 if (!vnodeIsDirectory(vnodeNumber)
2138 && ((vd < id && (id - vd) < 1887437)
2139 || ((vd > id && (vd - id) > 1887437)))) {
2141 #if defined(AFS_SGI_EXMAG)
2142 if (!vnodeIsDirectory(vnodeNumber)
2143 && ((vd < id && (id - vd) < 15099494)
2144 || ((vd > id && (vd - id) > 15099494)))) {
2146 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2147 #endif /* AFS_SGI_EXMAG */
2150 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2151 vnode->dataVersion = id;
2156 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2159 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2161 VNDISK_SET_INO(vnode, ip->inodeNumber);
2166 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2168 VNDISK_SET_INO(vnode, ip->inodeNumber);
2171 VNDISK_GET_LEN(vnodeLength, vnode);
2172 if (ip->byteCount != vnodeLength) {
2175 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2180 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2181 VNDISK_SET_LEN(vnode, ip->byteCount);
2185 ip->linkCount--; /* Keep the inode around */
2188 } else { /* no matching inode */
2189 if (VNDISK_GET_INO(vnode) != 0
2190 || vnode->type == vDirectory) {
2191 /* No matching inode--get rid of the vnode */
2193 if (VNDISK_GET_INO(vnode)) {
2195 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2199 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2204 if (VNDISK_GET_INO(vnode)) {
2206 time_t serverModifyTime = vnode->serverModifyTime;
2207 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2211 time_t serverModifyTime = vnode->serverModifyTime;
2212 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2215 memset(vnode, 0, vcp->diskSize);
2218 /* Should not reach here becuase we checked for
2219 * (inodeNumber == 0) above. And where we zero the vnode,
2220 * we also goto vnodeDone.
2224 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2228 } /* VNDISK_GET_INO(vnode) != 0 */
2230 assert(!(vnodeChanged && check));
2231 if (vnodeChanged && !Testing) {
2233 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2234 (char *)vnode, vcp->diskSize)
2236 VolumeChanged = 1; /* For break call back */
2247 struct VnodeEssence *
2248 CheckVnodeNumber(VnodeId vnodeNumber)
2251 struct VnodeInfo *vip;
2254 class = vnodeIdToClass(vnodeNumber);
2255 vip = &vnodeInfo[class];
2256 offset = vnodeIdToBitNumber(vnodeNumber);
2257 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2261 CopyOnWrite(register struct DirSummary *dir)
2263 /* Copy the directory unconditionally if we are going to change it:
2264 * not just if was cloned.
2266 struct VnodeDiskObject vnode;
2267 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2268 Inode oldinode, newinode;
2271 if (dir->copied || Testing)
2273 DFlush(); /* Well justified paranoia... */
2276 IH_IREAD(vnodeInfo[vLarge].handle,
2277 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2279 assert(code == sizeof(vnode));
2280 oldinode = VNDISK_GET_INO(&vnode);
2281 /* Increment the version number by a whole lot to avoid problems with
2282 * clients that were promised new version numbers--but the file server
2283 * crashed before the versions were written to disk.
2286 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2287 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2289 assert(VALID_INO(newinode));
2290 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2292 VNDISK_SET_INO(&vnode, newinode);
2294 IH_IWRITE(vnodeInfo[vLarge].handle,
2295 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2297 assert(code == sizeof(vnode));
2299 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2300 fileSysDevice, newinode);
2301 /* Don't delete the original inode right away, because the directory is
2302 * still being scanned.
2308 * This function should either successfully create a new dir, or give up
2309 * and leave things the way they were. In particular, if it fails to write
2310 * the new dir properly, it should return w/o changing the reference to the
2314 CopyAndSalvage(register struct DirSummary *dir)
2316 struct VnodeDiskObject vnode;
2317 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2318 Inode oldinode, newinode;
2323 afs_int32 parentUnique = 1;
2324 struct VnodeEssence *vnodeEssence;
2329 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2331 IH_IREAD(vnodeInfo[vLarge].handle,
2332 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2334 assert(lcode == sizeof(vnode));
2335 oldinode = VNDISK_GET_INO(&vnode);
2336 /* Increment the version number by a whole lot to avoid problems with
2337 * clients that were promised new version numbers--but the file server
2338 * crashed before the versions were written to disk.
2341 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2342 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2344 assert(VALID_INO(newinode));
2345 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2347 /* Assign . and .. vnode numbers from dir and vnode.parent.
2348 * The uniquifier for . is in the vnode.
2349 * The uniquifier for .. might be set to a bogus value of 1 and
2350 * the salvager will later clean it up.
2352 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2353 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2356 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2358 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2363 /* didn't really build the new directory properly, let's just give up. */
2364 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2365 Log("Directory salvage returned code %d, continuing.\n", code);
2367 Log("also failed to decrement link count on new inode");
2371 Log("Checking the results of the directory salvage...\n");
2372 if (!DirOK(&newdir)) {
2373 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2374 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2379 VNDISK_SET_INO(&vnode, newinode);
2380 length = Length(&newdir);
2381 VNDISK_SET_LEN(&vnode, length);
2383 IH_IWRITE(vnodeInfo[vLarge].handle,
2384 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2386 assert(lcode == sizeof(vnode));
2389 nt_sync(fileSysDevice);
2391 sync(); /* this is slow, but hopefully rarely called. We don't have
2392 * an open FD on the file itself to fsync.
2396 vnodeInfo[vLarge].handle->ih_synced = 1;
2398 /* make sure old directory file is really closed */
2399 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2400 FDH_REALLYCLOSE(fdP);
2402 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2404 dir->dirHandle = newdir;
2408 JudgeEntry(struct DirSummary *dir, char *name, VnodeId vnodeNumber,
2411 struct VnodeEssence *vnodeEssence;
2412 afs_int32 dirOrphaned, todelete;
2414 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2416 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2417 if (vnodeEssence == NULL) {
2419 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2423 assert(Delete(&dir->dirHandle, name) == 0);
2428 #ifndef AFS_NAMEI_ENV
2429 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2430 * mount inode for the partition. If this inode were deleted, it would crash
2433 if (vnodeEssence->InodeNumber == 0) {
2434 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2437 assert(Delete(&dir->dirHandle, name) == 0);
2444 if (!(vnodeNumber & 1) && !Showmode
2445 && !(vnodeEssence->count || vnodeEssence->unique
2446 || vnodeEssence->modeBits)) {
2447 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2448 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2449 vnodeNumber, unique,
2450 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2455 assert(Delete(&dir->dirHandle, name) == 0);
2461 /* Check if the Uniquifiers match. If not, change the directory entry
2462 * so its unique matches the vnode unique. Delete if the unique is zero
2463 * or if the directory is orphaned.
2465 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2466 if (!vnodeEssence->unique
2467 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2468 /* This is an orphaned directory. Don't delete the . or ..
2469 * entry. Otherwise, it will get created in the next
2470 * salvage and deleted again here. So Just skip it.
2475 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2478 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2482 fid.Vnode = vnodeNumber;
2483 fid.Unique = vnodeEssence->unique;
2485 assert(Delete(&dir->dirHandle, name) == 0);
2487 assert(Create(&dir->dirHandle, name, &fid) == 0);
2490 return 0; /* no need to continue */
2493 if (strcmp(name, ".") == 0) {
2494 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2497 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2500 assert(Delete(&dir->dirHandle, ".") == 0);
2501 fid.Vnode = dir->vnodeNumber;
2502 fid.Unique = dir->unique;
2503 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2506 vnodeNumber = fid.Vnode; /* Get the new Essence */
2507 unique = fid.Unique;
2508 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2511 } else if (strcmp(name, "..") == 0) {
2514 struct VnodeEssence *dotdot;
2515 pa.Vnode = dir->parent;
2516 dotdot = CheckVnodeNumber(pa.Vnode);
2517 assert(dotdot != NULL); /* XXX Should not be assert */
2518 pa.Unique = dotdot->unique;
2520 pa.Vnode = dir->vnodeNumber;
2521 pa.Unique = dir->unique;
2523 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2525 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2528 assert(Delete(&dir->dirHandle, "..") == 0);
2529 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2532 vnodeNumber = pa.Vnode; /* Get the new Essence */
2534 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2536 dir->haveDotDot = 1;
2537 } else if (strncmp(name, ".__afs", 6) == 0) {
2539 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2543 assert(Delete(&dir->dirHandle, name) == 0);
2545 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2546 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2549 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2550 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2551 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2552 && !(vnodeEssence->modeBits & 0111)) {
2558 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2559 vnodeEssence->InodeNumber);
2562 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2566 size = FDH_SIZE(fdP);
2568 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2569 FDH_REALLYCLOSE(fdP);
2576 code = FDH_READ(fdP, buf, size);
2579 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2580 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2581 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2582 Testing ? "would convert" : "converted");
2583 vnodeEssence->modeBits |= 0111;
2584 vnodeEssence->changed = 1;
2585 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2586 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2587 dir->name ? dir->name : "??", name, buf);
2589 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2590 dir->vname, vnodeNumber, size, code);
2592 FDH_REALLYCLOSE(fdP);
2595 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2596 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2597 if (vnodeIdToClass(vnodeNumber) == vLarge
2598 && vnodeEssence->name == NULL) {
2600 if ((n = (char *)malloc(strlen(name) + 1)))
2602 vnodeEssence->name = n;
2605 /* The directory entry points to the vnode. Check to see if the
2606 * vnode points back to the directory. If not, then let the
2607 * directory claim it (else it might end up orphaned). Vnodes
2608 * already claimed by another directory are deleted from this
2609 * directory: hardlinks to the same vnode are not allowed
2610 * from different directories.
2612 if (vnodeEssence->parent != dir->vnodeNumber) {
2613 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2614 /* Vnode does not point back to this directory.
2615 * Orphaned dirs cannot claim a file (it may belong to
2616 * another non-orphaned dir).
2619 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2621 vnodeEssence->parent = dir->vnodeNumber;
2622 vnodeEssence->changed = 1;
2624 /* Vnode was claimed by another directory */
2627 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2628 } else if (vnodeNumber == 1) {
2629 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2631 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2636 assert(Delete(&dir->dirHandle, name) == 0);
2641 /* This directory claims the vnode */
2642 vnodeEssence->claimed = 1;
2644 vnodeEssence->count--;
2649 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2651 register struct VnodeInfo *vip = &vnodeInfo[class];
2652 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2653 char buf[SIZEOF_LARGEDISKVNODE];
2654 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2656 StreamHandle_t *file;
2661 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2662 fdP = IH_OPEN(vip->handle);
2663 assert(fdP != NULL);
2664 file = FDH_FDOPEN(fdP, "r+");
2665 assert(file != NULL);
2666 size = OS_SIZE(fdP->fd_fd);
2668 vip->nVnodes = (size / vcp->diskSize) - 1;
2669 if (vip->nVnodes > 0) {
2670 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2671 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2672 assert((vip->vnodes = (struct VnodeEssence *)
2673 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2674 if (class == vLarge) {
2675 assert((vip->inodes = (Inode *)
2676 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2685 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2686 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2687 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2688 nVnodes--, vnodeIndex++) {
2689 if (vnode->type != vNull) {
2690 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2691 afs_fsize_t vnodeLength;
2692 vip->nAllocatedVnodes++;
2693 vep->count = vnode->linkCount;
2694 VNDISK_GET_LEN(vnodeLength, vnode);
2695 vep->blockCount = nBlocks(vnodeLength);
2696 vip->volumeBlockCount += vep->blockCount;
2697 vep->parent = vnode->parent;
2698 vep->unique = vnode->uniquifier;
2699 if (*maxu < vnode->uniquifier)
2700 *maxu = vnode->uniquifier;
2701 vep->modeBits = vnode->modeBits;
2702 vep->InodeNumber = VNDISK_GET_INO(vnode);
2703 vep->type = vnode->type;
2704 vep->author = vnode->author;
2705 vep->owner = vnode->owner;
2706 vep->group = vnode->group;
2707 if (vnode->type == vDirectory) {
2708 assert(class == vLarge);
2709 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2718 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2720 struct VnodeEssence *parentvp;
2726 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2727 && GetDirName(vp->parent, parentvp, path)) {
2729 strcat(path, vp->name);
2735 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2736 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2739 IsVnodeOrphaned(VnodeId vnode)
2741 struct VnodeEssence *vep;
2744 return (1); /* Vnode zero does not exist */
2746 return (0); /* The root dir vnode is always claimed */
2747 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2748 if (!vep || !vep->claimed)
2749 return (1); /* Vnode is not claimed - it is orphaned */
2751 return (IsVnodeOrphaned(vep->parent));
2755 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2756 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2759 static struct DirSummary dir;
2760 static struct DirHandle dirHandle;
2761 struct VnodeEssence *parent;
2762 static char path[MAXPATHLEN];
2765 if (dirVnodeInfo->vnodes[i].salvaged)
2766 return; /* already salvaged */
2769 dirVnodeInfo->vnodes[i].salvaged = 1;
2771 if (dirVnodeInfo->inodes[i] == 0)
2772 return; /* Not allocated to a directory */
2774 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2775 if (dirVnodeInfo->vnodes[i].parent) {
2776 Log("Bad parent, vnode 1; %s...\n",
2777 (Testing ? "skipping" : "salvaging"));
2778 dirVnodeInfo->vnodes[i].parent = 0;
2779 dirVnodeInfo->vnodes[i].changed = 1;
2782 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2783 if (parent && parent->salvaged == 0)
2784 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2785 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2786 rootdir, rootdirfound);
2789 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2790 dir.unique = dirVnodeInfo->vnodes[i].unique;
2793 dir.parent = dirVnodeInfo->vnodes[i].parent;
2794 dir.haveDot = dir.haveDotDot = 0;
2795 dir.ds_linkH = alinkH;
2796 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2797 dirVnodeInfo->inodes[i]);
2799 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2802 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2803 (Testing ? "skipping" : "salvaging"));
2806 CopyAndSalvage(&dir);
2810 dirHandle = dir.dirHandle;
2813 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2814 &dirVnodeInfo->vnodes[i], path);
2817 /* If enumeration failed for random reasons, we will probably delete
2818 * too much stuff, so we guard against this instead.
2820 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2823 /* Delete the old directory if it was copied in order to salvage.
2824 * CopyOnWrite has written the new inode # to the disk, but we still
2825 * have the old one in our local structure here. Thus, we idec the
2829 if (dir.copied && !Testing) {
2830 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2832 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2835 /* Remember rootdir DirSummary _after_ it has been judged */
2836 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2837 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2845 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2847 /* This routine, for now, will only be called for read-write volumes */
2849 int BlocksInVolume = 0, FilesInVolume = 0;
2850 register VnodeClass class;
2851 struct DirSummary rootdir, oldrootdir;
2852 struct VnodeInfo *dirVnodeInfo;
2853 struct VnodeDiskObject vnode;
2854 VolumeDiskData volHeader;
2856 int orphaned, rootdirfound = 0;
2857 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2858 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2859 struct VnodeEssence *vep;
2862 afs_sfsize_t nBytes;
2864 VnodeId LFVnode, ThisVnode;
2865 Unique LFUnique, ThisUnique;
2868 vid = rwIsp->volSummary->header.id;
2869 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2870 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2871 assert(nBytes == sizeof(volHeader));
2872 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2873 assert(volHeader.destroyMe != DESTROY_ME);
2874 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2876 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2878 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2881 dirVnodeInfo = &vnodeInfo[vLarge];
2882 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2883 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2887 nt_sync(fileSysDevice);
2889 sync(); /* This used to be done lower level, for every dir */
2896 /* Parse each vnode looking for orphaned vnodes and
2897 * connect them to the tree as orphaned (if requested).
2899 oldrootdir = rootdir;
2900 for (class = 0; class < nVNODECLASSES; class++) {
2901 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2902 vep = &(vnodeInfo[class].vnodes[v]);
2903 ThisVnode = bitNumberToVnodeNumber(v, class);
2904 ThisUnique = vep->unique;
2906 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2907 continue; /* Ignore unused, claimed, and root vnodes */
2909 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2910 * entry in this vnode had incremented the parent link count (In
2911 * JudgeEntry()). We need to go to the parent and decrement that
2912 * link count. But if the parent's unique is zero, then the parent
2913 * link count was not incremented in JudgeEntry().
2915 if (class == vLarge) { /* directory vnode */
2916 pv = vnodeIdToBitNumber(vep->parent);
2917 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2918 vnodeInfo[vLarge].vnodes[pv].count++;
2922 continue; /* If no rootdir, can't attach orphaned files */
2924 /* Here we attach orphaned files and directories into the
2925 * root directory, LVVnode, making sure link counts stay correct.
2927 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2928 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2929 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2931 /* Update this orphaned vnode's info. Its parent info and
2932 * link count (do for orphaned directories and files).
2934 vep->parent = LFVnode; /* Parent is the root dir */
2935 vep->unique = LFUnique;
2938 vep->count--; /* Inc link count (root dir will pt to it) */
2940 /* If this orphaned vnode is a directory, change '..'.
2941 * The name of the orphaned dir/file is unknown, so we
2942 * build a unique name. No need to CopyOnWrite the directory
2943 * since it is not connected to tree in BK or RO volume and
2944 * won't be visible there.
2946 if (class == vLarge) {
2950 /* Remove and recreate the ".." entry in this orphaned directory */
2951 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2952 vnodeInfo[class].inodes[v]);
2954 pa.Unique = LFUnique;
2955 assert(Delete(&dh, "..") == 0);
2956 assert(Create(&dh, "..", &pa) == 0);
2958 /* The original parent's link count was decremented above.
2959 * Here we increment the new parent's link count.
2961 pv = vnodeIdToBitNumber(LFVnode);
2962 vnodeInfo[vLarge].vnodes[pv].count--;
2966 /* Go to the root dir and add this entry. The link count of the
2967 * root dir was incremented when ".." was created. Try 10 times.
2969 for (j = 0; j < 10; j++) {
2970 pa.Vnode = ThisVnode;
2971 pa.Unique = ThisUnique;
2973 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
2975 vLarge) ? "__ORPHANDIR__" :
2976 "__ORPHANFILE__"), ThisVnode,
2979 CopyOnWrite(&rootdir);
2980 code = Create(&rootdir.dirHandle, npath, &pa);
2984 ThisUnique += 50; /* Try creating a different file */
2987 Log("Attaching orphaned %s to volume's root dir as %s\n",
2988 ((class == vLarge) ? "directory" : "file"), npath);
2990 } /* for each vnode in the class */
2991 } /* for each class of vnode */
2993 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
2995 if (!oldrootdir.copied && rootdir.copied) {
2997 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3000 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3003 DFlush(); /* Flush the changes */
3004 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3005 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3006 orphans = ORPH_IGNORE;
3009 /* Write out all changed vnodes. Orphaned files and directories
3010 * will get removed here also (if requested).
3012 for (class = 0; class < nVNODECLASSES; class++) {
3013 int nVnodes = vnodeInfo[class].nVnodes;
3014 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3015 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3016 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3017 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3018 for (i = 0; i < nVnodes; i++) {
3019 register struct VnodeEssence *vnp = &vnodes[i];
3020 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3022 /* If the vnode is good but is unclaimed (not listed in
3023 * any directory entries), then it is orphaned.
3026 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3027 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3031 if (vnp->changed || vnp->count) {
3035 IH_IREAD(vnodeInfo[class].handle,
3036 vnodeIndexOffset(vcp, vnodeNumber),
3037 (char *)&vnode, sizeof(vnode));
3038 assert(nBytes == sizeof(vnode));
3040 vnode.parent = vnp->parent;
3041 oldCount = vnode.linkCount;
3042 vnode.linkCount = vnode.linkCount - vnp->count;
3045 orphaned = IsVnodeOrphaned(vnodeNumber);
3047 if (!vnp->todelete) {
3048 /* Orphans should have already been attached (if requested) */
3049 assert(orphans != ORPH_ATTACH);
3050 oblocks += vnp->blockCount;
3053 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3055 BlocksInVolume -= vnp->blockCount;
3057 if (VNDISK_GET_INO(&vnode)) {
3059 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3062 memset(&vnode, 0, sizeof(vnode));
3064 } else if (vnp->count) {
3066 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3069 vnode.modeBits = vnp->modeBits;
3072 vnode.dataVersion++;
3075 IH_IWRITE(vnodeInfo[class].handle,
3076 vnodeIndexOffset(vcp, vnodeNumber),
3077 (char *)&vnode, sizeof(vnode));
3078 assert(nBytes == sizeof(vnode));
3084 if (!Showmode && ofiles) {
3085 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3087 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3091 for (class = 0; class < nVNODECLASSES; class++) {
3092 register struct VnodeInfo *vip = &vnodeInfo[class];
3093 for (i = 0; i < vip->nVnodes; i++)
3094 if (vip->vnodes[i].name)
3095 free(vip->vnodes[i].name);
3102 /* Set correct resource utilization statistics */
3103 volHeader.filecount = FilesInVolume;
3104 volHeader.diskused = BlocksInVolume;
3106 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3107 if (volHeader.uniquifier < (maxunique + 1)) {
3109 Log("Volume uniquifier is too low; fixed\n");
3110 /* Plus 2,000 in case there are workstations out there with
3111 * cached vnodes that have since been deleted
3113 volHeader.uniquifier = (maxunique + 1 + 2000);
3116 /* Turn off the inUse bit; the volume's been salvaged! */
3117 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3118 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3119 volHeader.inService = 1; /* allow service again */
3120 volHeader.needsCallback = (VolumeChanged != 0);
3121 volHeader.dontSalvage = DONT_SALVAGE;
3124 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3125 assert(nBytes == sizeof(volHeader));
3128 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3129 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3130 FilesInVolume, BlocksInVolume);
3132 IH_RELEASE(vnodeInfo[vSmall].handle);
3133 IH_RELEASE(vnodeInfo[vLarge].handle);
3139 ClearROInUseBit(struct VolumeSummary *summary)
3141 IHandle_t *h = summary->volumeInfoHandle;
3142 afs_sfsize_t nBytes;
3144 VolumeDiskData volHeader;
3146 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3147 assert(nBytes == sizeof(volHeader));
3148 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3149 volHeader.inUse = 0;
3150 volHeader.needsSalvaged = 0;
3151 volHeader.inService = 1;
3152 volHeader.dontSalvage = DONT_SALVAGE;
3154 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3155 assert(nBytes == sizeof(volHeader));
3160 * Possible delete the volume.
3162 * deleteMe - Always do so, only a partial volume.
3165 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3168 if (readOnly(isp) || deleteMe) {
3169 if (isp->volSummary && isp->volSummary->fileName) {
3172 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3174 Log("It will be deleted on this server (you may find it elsewhere)\n");
3177 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3179 Log("it will be deleted instead. It should be recloned.\n");
3182 unlink(isp->volSummary->fileName);
3184 } else if (!check) {
3185 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3187 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3193 AskOffline(VolumeId volumeId, char * partition)
3197 for (i = 0; i < 3; i++) {
3198 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3200 if (code == SYNC_OK) {
3202 } else if (code == SYNC_DENIED) {
3203 #ifdef DEMAND_ATTACH_ENABLE
3204 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3206 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3208 Abort("Salvage aborted\n");
3209 } else if (code == SYNC_BAD_COMMAND) {
3210 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3212 #ifdef DEMAND_ATTACH_ENABLE
3213 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3215 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3217 Abort("Salvage aborted\n");
3220 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3221 FSYNC_clientFinis();
3225 if (code != SYNC_OK) {
3226 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3227 Abort("Salvage aborted\n");
3232 AskOnline(VolumeId volumeId, char *partition)
3236 for (i = 0; i < 3; i++) {
3237 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3239 if (code == SYNC_OK) {
3241 } else if (code == SYNC_DENIED) {
3242 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3243 } else if (code == SYNC_BAD_COMMAND) {
3244 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3246 #ifdef DEMAND_ATTACH_ENABLE
3247 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3249 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3254 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3255 FSYNC_clientFinis();
3262 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3264 /* Volume parameter is passed in case iopen is upgraded in future to
3265 * require a volume Id to be passed
3268 IHandle_t *srcH, *destH;
3269 FdHandle_t *srcFdP, *destFdP;
3272 IH_INIT(srcH, device, rwvolume, inode1);
3273 srcFdP = IH_OPEN(srcH);
3274 assert(srcFdP != NULL);
3275 IH_INIT(destH, device, rwvolume, inode2);
3276 destFdP = IH_OPEN(destH);
3278 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3279 assert(FDH_WRITE(destFdP, buf, n) == n);
3281 FDH_REALLYCLOSE(srcFdP);
3282 FDH_REALLYCLOSE(destFdP);
3289 PrintInodeList(void)
3291 register struct ViceInodeInfo *ip;
3292 struct ViceInodeInfo *buf;
3293 struct afs_stat status;
3296 assert(afs_fstat(inodeFd, &status) == 0);
3297 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3298 assert(buf != NULL);
3299 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3300 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3301 for (ip = buf; nInodes--; ip++) {
3302 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3303 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3304 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3305 ip->u.param[2], ip->u.param[3]);
3311 PrintInodeSummary(void)
3314 struct InodeSummary *isp;
3316 for (i = 0; i < nVolumesInInodeFile; i++) {
3317 isp = &inodeSummary[i];
3318 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3323 PrintVolumeSummary(void)
3326 struct VolumeSummary *vsp;
3328 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3329 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3339 assert(0); /* Fork is never executed in the NT code path */
3343 #ifdef AFS_DEMAND_ATTACH_FS
3344 if ((f == 0) && (programType == salvageServer)) {
3345 /* we are a salvageserver child */
3346 #ifdef FSSYNC_BUILD_CLIENT
3347 VChildProcReconnectFS_r();
3349 #ifdef SALVSYNC_BUILD_CLIENT
3353 #endif /* AFS_DEMAND_ATTACH_FS */
3354 #endif /* !AFS_NT40_ENV */
3365 #ifdef AFS_DEMAND_ATTACH_FS
3366 if (programType == salvageServer) {
3367 #ifdef SALVSYNC_BUILD_CLIENT
3370 #ifdef FSSYNC_BUILD_CLIENT
3374 #endif /* AFS_DEMAND_ATTACH_FS */
3377 if (main_thread != pthread_self())
3378 pthread_exit((void *)code);
3391 pid = wait(&status);
3393 if (WCOREDUMP(status))
3394 Log("\"%s\" core dumped!\n", prog);
3395 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3401 TimeStamp(time_t clock, int precision)
3404 static char timestamp[20];
3405 lt = localtime(&clock);
3407 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3409 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3414 CheckLogFile(char * log_path)
3416 char oldSlvgLog[AFSDIR_PATH_MAX];
3418 #ifndef AFS_NT40_ENV
3425 strcpy(oldSlvgLog, log_path);
3426 strcat(oldSlvgLog, ".old");
3428 renamefile(log_path, oldSlvgLog);
3429 logFile = afs_fopen(log_path, "a");
3431 if (!logFile) { /* still nothing, use stdout */
3435 #ifndef AFS_NAMEI_ENV
3436 AFS_DEBUG_IOPS_LOG(logFile);
3441 #ifndef AFS_NT40_ENV
3443 TimeStampLogFile(char * log_path)
3445 char stampSlvgLog[AFSDIR_PATH_MAX];
3450 lt = localtime(&now);
3451 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3452 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3453 log_path, lt->tm_year + 1900,
3454 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3457 /* try to link the logfile to a timestamped filename */
3458 /* if it fails, oh well, nothing we can do */
3459 link(log_path, stampSlvgLog);
3468 #ifndef AFS_NT40_ENV
3470 printf("Can't show log since using syslog.\n");
3479 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3482 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3485 while (fgets(line, sizeof(line), logFile))
3492 Log(const char *format, ...)
3498 va_start(args, format);
3499 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3501 #ifndef AFS_NT40_ENV
3503 syslog(LOG_INFO, "%s", tmp);
3507 gettimeofday(&now, 0);
3508 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3514 Abort(const char *format, ...)
3519 va_start(args, format);
3520 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3522 #ifndef AFS_NT40_ENV
3524 syslog(LOG_INFO, "%s", tmp);
3528 fprintf(logFile, "%s", tmp);
3543 p = (char *)malloc(strlen(s) + 1);
3549 /* Remove the FORCESALVAGE file */
3551 RemoveTheForce(char *path)
3554 struct afs_stat force; /* so we can use afs_stat to find it */
3555 strcpy(target,path);
3556 strcat(target,"/FORCESALVAGE");
3557 if (!Testing && ForceSalvage) {
3558 if (afs_stat(target,&force) == 0) unlink(target);
3562 #ifndef AFS_AIX32_ENV
3564 * UseTheForceLuke - see if we can use the force
3567 UseTheForceLuke(char *path)
3569 struct afs_stat force;
3571 strcpy(target,path);
3572 strcat(target,"/FORCESALVAGE");
3574 return (afs_stat(target, &force) == 0);
3578 * UseTheForceLuke - see if we can use the force
3581 * The VRMIX fsck will not muck with the filesystem it is supposedly
3582 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3583 * muck directly with the root inode, which is within the normal
3585 * ListViceInodes() has a side effect of setting ForceSalvage if
3586 * it detects a need, based on root inode examination.
3589 UseTheForceLuke(char *path)
3592 return 0; /* sorry OB1 */
3597 /* NT support routines */
3599 static char execpathname[MAX_PATH];
3601 nt_SalvagePartition(char *partName, int jobn)
3606 if (!*execpathname) {
3607 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3608 if (!n || n == 1023)
3611 job.cj_magic = SALVAGER_MAGIC;
3612 job.cj_number = jobn;
3613 (void)strcpy(job.cj_part, partName);
3614 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3619 nt_SetupPartitionSalvage(void *datap, int len)
3621 childJob_t *jobp = (childJob_t *) datap;
3622 char logname[AFSDIR_PATH_MAX];
3624 if (len != sizeof(childJob_t))
3626 if (jobp->cj_magic != SALVAGER_MAGIC)
3631 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3633 logFile = afs_fopen(logname, "w");
3641 #endif /* AFS_NT40_ENV */