2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
108 #if defined(AFS_AIX_ENV) || defined(AFS_SUN4_ENV)
109 #define WCOREDUMP(x) (x & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "salvsync.h"
187 #include "viceinode.h"
189 #include "volinodes.h" /* header magic number, etc. stuff */
190 #include "vol-salvage.h"
191 #include "vol_internal.h"
197 /*@+fcnmacros +macrofcndecl@*/
200 extern off64_t afs_lseek(int FD, off64_t O, int F);
201 #endif /*S_SPLINT_S */
202 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
203 #define afs_stat stat64
204 #define afs_fstat fstat64
205 #define afs_open open64
206 #define afs_fopen fopen64
207 #else /* !O_LARGEFILE */
209 extern off_t afs_lseek(int FD, off_t O, int F);
210 #endif /*S_SPLINT_S */
211 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
212 #define afs_stat stat
213 #define afs_fstat fstat
214 #define afs_open open
215 #define afs_fopen fopen
216 #endif /* !O_LARGEFILE */
217 /*@=fcnmacros =macrofcndecl@*/
220 extern void *calloc();
222 static char *TimeStamp(time_t clock, int precision);
225 int debug; /* -d flag */
226 extern int Testing; /* -n flag */
227 int ListInodeOption; /* -i flag */
228 int ShowRootFiles; /* -r flag */
229 int RebuildDirs; /* -sal flag */
230 int Parallel = 4; /* -para X flag */
231 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
232 int forceR = 0; /* -b flag */
233 int ShowLog = 0; /* -showlog flag */
234 int ShowSuid = 0; /* -showsuid flag */
235 int ShowMounts = 0; /* -showmounts flag */
236 int orphans = ORPH_IGNORE; /* -orphans option */
241 int useSyslog = 0; /* -syslog flag */
242 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
251 #define MAXPARALLEL 32
253 int OKToZap; /* -o flag */
254 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
255 * in the volume header */
257 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
259 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
261 Device fileSysDevice; /* The device number of the current
262 * partition being salvaged */
266 char *fileSysPath; /* The path of the mounted partition currently
267 * being salvaged, i.e. the directory
268 * containing the volume headers */
270 char *fileSysPathName; /* NT needs this to make name pretty in log. */
271 IHandle_t *VGLinkH; /* Link handle for current volume group. */
272 int VGLinkH_cnt; /* # of references to lnk handle. */
273 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
275 char *fileSysDeviceName; /* The block device where the file system
276 * being salvaged was mounted */
277 char *filesysfulldev;
279 int VolumeChanged; /* Set by any routine which would change the volume in
280 * a way which would require callback is to be broken if the
281 * volume was put back on line by an active file server */
283 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
285 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
286 int inodeFd; /* File descriptor for inode file */
289 struct VnodeInfo vnodeInfo[nVNODECLASSES];
292 struct VolumeSummary *volumeSummaryp; /* Holds all the volumes in a part */
293 int nVolumes; /* Number of volumes (read-write and read-only)
294 * in volume summary */
300 /* Forward declarations */
301 /*@printflike@*/ void Log(const char *format, ...);
302 /*@printflike@*/ void Abort(const char *format, ...);
303 static int IsVnodeOrphaned(VnodeId vnode);
305 /* Uniquifier stored in the Inode */
310 return (u & 0x3fffff);
312 #if defined(AFS_SGI_EXMAG)
313 return (u & SGI_UNIQMASK);
316 #endif /* AFS_SGI_EXMAG */
321 BadError(register int aerror)
323 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
325 return 0; /* otherwise may be transient, e.g. EMFILE */
330 char *save_args[MAX_ARGS];
332 extern pthread_t main_thread;
333 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
336 /* Get the salvage lock if not already held. Hold until process exits. */
338 ObtainSalvageLock(void)
344 (FD_t)CreateFile(AFSDIR_SERVER_SLVGLOCK_FILEPATH, 0, 0, NULL,
345 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
346 if (salvageLock == INVALID_FD) {
348 "salvager: There appears to be another salvager running! Aborted.\n");
353 afs_open(AFSDIR_SERVER_SLVGLOCK_FILEPATH, O_CREAT | O_RDWR, 0666);
354 if (salvageLock < 0) {
356 "salvager: can't open salvage lock file %s, aborting\n",
357 AFSDIR_SERVER_SLVGLOCK_FILEPATH);
360 #ifdef AFS_DARWIN_ENV
361 if (flock(salvageLock, LOCK_EX) == -1) {
363 if (lockf(salvageLock, F_LOCK, 0) == -1) {
366 "salvager: There appears to be another salvager running! Aborted.\n");
373 #ifdef AFS_SGI_XFS_IOPS_ENV
374 /* Check if the given partition is mounted. For XFS, the root inode is not a
375 * constant. So we check the hard way.
378 IsPartitionMounted(char *part)
381 struct mntent *mntent;
383 assert(mntfp = setmntent(MOUNTED, "r"));
384 while (mntent = getmntent(mntfp)) {
385 if (!strcmp(part, mntent->mnt_dir))
390 return mntent ? 1 : 1;
393 /* Check if the given inode is the root of the filesystem. */
394 #ifndef AFS_SGI_XFS_IOPS_ENV
396 IsRootInode(struct afs_stat *status)
399 * The root inode is not a fixed value in XFS partitions. So we need to
400 * see if the partition is in the list of mounted partitions. This only
401 * affects the SalvageFileSys path, so we check there.
403 return (status->st_ino == ROOTINODE);
408 #ifndef AFS_NAMEI_ENV
409 /* We don't want to salvage big files filesystems, since we can't put volumes on
413 CheckIfBigFilesFS(char *mountPoint, char *devName)
415 struct superblock fs;
418 if (strncmp(devName, "/dev/", 5)) {
419 (void)sprintf(name, "/dev/%s", devName);
421 (void)strcpy(name, devName);
424 if (ReadSuper(&fs, name) < 0) {
425 Log("Unable to read superblock. Not salvaging partition %s.\n",
429 if (IsBigFilesFileSystem(&fs)) {
430 Log("Partition %s is a big files filesystem, not salvaging.\n",
440 #define HDSTR "\\Device\\Harddisk"
441 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
443 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
448 static int dowarn = 1;
450 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
452 if (strncmp(res, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res, HDSTR, p1->devName);
460 d1 = atoi(&res[HDLEN]);
462 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
464 if (strncmp(res, HDSTR, HDLEN)) {
467 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
468 res, HDSTR, p2->devName);
472 d2 = atoi(&res[HDLEN]);
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, '/');
698 ptr = (char *)strrchr(pbuffer, '/');
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 static char tmpDevName[100];
712 static char wpath[100];
713 struct VolumeSummary *vsp, *esp;
716 fileSysPartition = partP;
717 fileSysDevice = fileSysPartition->device;
718 fileSysPathName = VPartitionPath(fileSysPartition);
721 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
722 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
723 name = partP->devName;
725 fileSysPath = fileSysPathName;
726 strcpy(tmpDevName, partP->devName);
727 name = get_DevName(tmpDevName, wpath);
728 fileSysDeviceName = name;
729 filesysfulldev = wpath;
732 VLockPartition(partP->name);
733 if (singleVolumeNumber || ForceSalvage)
736 ForceSalvage = UseTheForceLuke(fileSysPath);
738 if (singleVolumeNumber) {
739 /* salvageserver already setup fssync conn for us */
740 if ((programType != salvageServer) && !VConnectFS()) {
741 Abort("Couldn't connect to file server\n");
743 AskOffline(singleVolumeNumber, partP->name);
746 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
747 partP->name, name, (Testing ? "(READONLY mode)" : ""));
749 Log("***Forced salvage of all volumes on this partition***\n");
754 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
761 assert((dirp = opendir(fileSysPath)) != NULL);
762 while ((dp = readdir(dirp))) {
763 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
764 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
766 Log("Removing old salvager temp files %s\n", dp->d_name);
767 strcpy(npath, fileSysPath);
769 strcat(npath, dp->d_name);
775 tdir = (tmpdir ? tmpdir : fileSysPath);
777 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
778 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
780 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
783 if (GetInodeSummary(inodeListPath, singleVolumeNumber) < 0) {
784 unlink(inodeListPath);
788 /* Using nt_unlink here since we're really using the delete on close
789 * semantics of unlink. In most places in the salvager, we really do
790 * mean to unlink the file at that point. Those places have been
791 * modified to actually do that so that the NT crt can be used there.
794 _open_osfhandle((intptr_t)nt_open(inodeListPath, O_RDWR, 0), O_RDWR);
795 nt_unlink(inodeListPath); /* NT's crt unlink won't if file is open. */
797 inodeFd = afs_open(inodeListPath, O_RDONLY);
798 unlink(inodeListPath);
801 Abort("Temporary file %s is missing...\n", inodeListPath);
802 if (ListInodeOption) {
806 /* enumerate volumes in the partition.
807 * figure out sets of read-only + rw volumes.
808 * salvage each set, read-only volumes first, then read-write.
809 * Fix up inodes on last volume in set (whether it is read-write
812 GetVolumeSummary(singleVolumeNumber);
814 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
815 i < nVolumesInInodeFile; i = j) {
816 VolumeId rwvid = inodeSummary[i].RWvolumeId;
818 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
820 VolumeId vid = inodeSummary[j].volumeId;
821 struct VolumeSummary *tsp;
822 /* Scan volume list (from partition root directory) looking for the
823 * current rw volume number in the volume list from the inode scan.
824 * If there is one here that is not in the inode volume list,
826 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
828 DeleteExtraVolumeHeaderFile(vsp);
830 /* Now match up the volume summary info from the root directory with the
831 * entry in the volume list obtained from scanning inodes */
832 inodeSummary[j].volSummary = NULL;
833 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
834 if (tsp->header.id == vid) {
835 inodeSummary[j].volSummary = tsp;
841 /* Salvage the group of volumes (several read-only + 1 read/write)
842 * starting with the current read-only volume we're looking at.
844 SalvageVolumeGroup(&inodeSummary[i], j - i);
847 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
848 for (; vsp < esp; vsp++) {
850 DeleteExtraVolumeHeaderFile(vsp);
853 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
854 RemoveTheForce(fileSysPath);
856 if (!Testing && singleVolumeNumber) {
857 AskOnline(singleVolumeNumber, fileSysPartition->name);
859 /* Step through the volumeSummary list and set all volumes on-line.
860 * The volumes were taken off-line in GetVolumeSummary.
862 for (j = 0; j < nVolumes; j++) {
863 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
867 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
868 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
871 close(inodeFd); /* SalvageVolumeGroup was the last which needed it. */
875 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
878 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
881 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
884 Log("Unable to unlink %s (errno = %d)\n", path, errno);
891 CompareInodes(const void *_p1, const void *_p2)
893 register const struct ViceInodeInfo *p1 = _p1;
894 register const struct ViceInodeInfo *p2 = _p2;
895 if (p1->u.vnode.vnodeNumber == INODESPECIAL
896 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
897 VolumeId p1rwid, p2rwid;
899 (p1->u.vnode.vnodeNumber ==
900 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
902 (p2->u.vnode.vnodeNumber ==
903 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
908 if (p1->u.vnode.vnodeNumber == INODESPECIAL
909 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
910 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
911 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
912 if (p1->u.vnode.volumeId == p1rwid)
914 if (p2->u.vnode.volumeId == p2rwid)
916 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
918 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
919 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
920 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
922 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
924 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
926 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
928 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
930 /* The following tests are reversed, so that the most desirable
931 * of several similar inodes comes first */
932 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
934 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
935 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
939 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
940 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
945 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
947 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
948 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
952 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
953 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
958 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
960 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
961 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
965 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
966 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
971 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
973 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
974 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
978 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
979 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
988 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
989 register struct InodeSummary *summary)
991 VolumeId volume = ip->u.vnode.volumeId;
992 VolumeId rwvolume = volume;
993 register int n, nSpecial;
994 register Unique maxunique;
997 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
999 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1001 rwvolume = ip->u.special.parentId;
1002 /* This isn't quite right, as there could (in error) be different
1003 * parent inodes in different special vnodes */
1005 if (maxunique < ip->u.vnode.vnodeUniquifier)
1006 maxunique = ip->u.vnode.vnodeUniquifier;
1010 summary->volumeId = volume;
1011 summary->RWvolumeId = rwvolume;
1012 summary->nInodes = n;
1013 summary->nSpecialInodes = nSpecial;
1014 summary->maxUniquifier = maxunique;
1018 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1020 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1021 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1022 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1027 * Collect list of inodes in file named by path. If a truly fatal error,
1028 * unlink the file and abort. For lessor errors, return -1. The file will
1029 * be unlinked by the caller.
1032 GetInodeSummary(char *path, VolumeId singleVolumeNumber)
1034 struct afs_stat status;
1036 struct ViceInodeInfo *ip;
1037 struct InodeSummary summary;
1038 char summaryFileName[50];
1041 char *dev = fileSysPath;
1042 char *wpath = fileSysPath;
1044 char *dev = fileSysDeviceName;
1045 char *wpath = filesysfulldev;
1047 char *part = fileSysPath;
1050 /* This file used to come from vfsck; cobble it up ourselves now... */
1052 ListViceInodes(dev, fileSysPath, path,
1053 singleVolumeNumber ? OnlyOneVolume : 0,
1054 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1056 Log("*** I/O error %d when writing a tmp inode file %s; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, path, dev);
1060 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1062 if (forceSal && !ForceSalvage) {
1063 Log("***Forced salvage of all volumes on this partition***\n");
1066 inodeFd = afs_open(path, O_RDWR);
1067 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1069 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1071 tdir = (tmpdir ? tmpdir : part);
1073 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1074 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1076 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1077 "%s/salvage.temp.%d", tdir, getpid());
1079 summaryFile = afs_fopen(summaryFileName, "a+");
1080 if (summaryFile == NULL) {
1083 Abort("Unable to create inode summary file\n");
1085 if (!canfork || debug || Fork() == 0) {
1087 unsigned long st_size=(unsigned long) status.st_size;
1088 nInodes = st_size / sizeof(struct ViceInodeInfo);
1090 fclose(summaryFile);
1092 unlink(summaryFileName);
1093 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1094 RemoveTheForce(fileSysPath);
1096 struct VolumeSummary *vsp;
1099 GetVolumeSummary(singleVolumeNumber);
1101 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1103 DeleteExtraVolumeHeaderFile(vsp);
1106 Log("%s vice inodes on %s; not salvaged\n",
1107 singleVolumeNumber ? "No applicable" : "No", dev);
1110 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1112 fclose(summaryFile);
1115 unlink(summaryFileName);
1117 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1120 if (read(inodeFd, ip, st_size) != st_size) {
1121 fclose(summaryFile);
1124 unlink(summaryFileName);
1125 Abort("Unable to read inode table; %s not salvaged\n", dev);
1127 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1128 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1129 || write(inodeFd, ip, st_size) != st_size) {
1130 fclose(summaryFile);
1133 unlink(summaryFileName);
1134 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1138 CountVolumeInodes(ip, nInodes, &summary);
1139 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1140 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1141 fclose(summaryFile);
1145 summary.index += (summary.nInodes);
1146 nInodes -= summary.nInodes;
1147 ip += summary.nInodes;
1149 /* Following fflush is not fclose, because if it was debug mode would not work */
1150 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1151 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1152 fclose(summaryFile);
1156 if (canfork && !debug) {
1161 if (Wait("Inode summary") == -1) {
1162 fclose(summaryFile);
1165 unlink(summaryFileName);
1166 Exit(1); /* salvage of this partition aborted */
1169 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1170 if (status.st_size != 0) {
1172 unsigned long st_status=(unsigned long)status.st_size;
1173 inodeSummary = (struct InodeSummary *)malloc(st_status);
1174 assert(inodeSummary != NULL);
1175 /* For GNU we need to do lseek to get the file pointer moved. */
1176 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1177 ret = read(fileno(summaryFile), inodeSummary, st_status);
1178 assert(ret == st_status);
1180 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1181 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1182 fclose(summaryFile);
1184 unlink(summaryFileName);
1188 /* Comparison routine for volume sort.
1189 This is setup so that a read-write volume comes immediately before
1190 any read-only clones of that volume */
1192 CompareVolumes(const void *_p1, const void *_p2)
1194 register const struct VolumeSummary *p1 = _p1;
1195 register const struct VolumeSummary *p2 = _p2;
1196 if (p1->header.parent != p2->header.parent)
1197 return p1->header.parent < p2->header.parent ? -1 : 1;
1198 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1200 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1202 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1206 GetVolumeSummary(VolumeId singleVolumeNumber)
1209 afs_int32 nvols = 0;
1210 struct VolumeSummary *vsp, vs;
1211 struct VolumeDiskHeader diskHeader;
1214 /* Get headers from volume directory */
1215 dirp = opendir(fileSysPath);
1217 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1218 if (!singleVolumeNumber) {
1219 while ((dp = readdir(dirp))) {
1220 char *p = dp->d_name;
1221 p = strrchr(dp->d_name, '.');
1222 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1225 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1226 if ((fd = afs_open(name, O_RDONLY)) != -1
1227 && read(fd, (char *)&diskHeader, sizeof(diskHeader))
1228 == sizeof(diskHeader)
1229 && diskHeader.stamp.magic == VOLUMEHEADERMAGIC) {
1230 DiskToVolumeHeader(&vs.header, &diskHeader);
1238 dirp = opendir("."); /* No rewinddir for NT */
1245 (struct VolumeSummary *)malloc(nvols *
1246 sizeof(struct VolumeSummary));
1249 (struct VolumeSummary *)malloc(20 * sizeof(struct VolumeSummary));
1250 assert(volumeSummaryp != NULL);
1253 vsp = volumeSummaryp;
1254 while ((dp = readdir(dirp))) {
1255 char *p = dp->d_name;
1256 p = strrchr(dp->d_name, '.');
1257 if (p != NULL && strcmp(p, VHDREXT) == 0) {
1261 sprintf(name, "%s/%s", fileSysPath, dp->d_name);
1262 if ((fd = afs_open(name, O_RDONLY)) == -1
1263 || read(fd, &diskHeader, sizeof(diskHeader))
1264 != sizeof(diskHeader)
1265 || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
1270 if (!singleVolumeNumber) {
1272 Log("%s/%s is not a legitimate volume header file; %sdeleted\n", fileSysPathName, dp->d_name, (Testing ? "it would have been " : ""));
1277 char nameShouldBe[64];
1278 DiskToVolumeHeader(&vsp->header, &diskHeader);
1279 if (singleVolumeNumber && vsp->header.id == singleVolumeNumber
1280 && vsp->header.parent != singleVolumeNumber) {
1281 if (programType == salvageServer) {
1282 #ifdef SALVSYNC_BUILD_CLIENT
1283 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1284 vsp->header.id, vsp->header.parent);
1285 if (SALVSYNC_LinkVolume(vsp->header.parent,
1287 fileSysPartition->name,
1289 Log("schedule request failed\n");
1292 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1294 Log("%u is a read-only volume; not salvaged\n",
1295 singleVolumeNumber);
1299 if (!singleVolumeNumber
1300 || (vsp->header.id == singleVolumeNumber
1301 || vsp->header.parent == singleVolumeNumber)) {
1302 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1303 VFORMAT, afs_printable_uint32_lu(vsp->header.id));
1304 if (singleVolumeNumber
1305 && vsp->header.id != singleVolumeNumber)
1306 AskOffline(vsp->header.id, fileSysPartition->name);
1307 if (strcmp(nameShouldBe, dp->d_name)) {
1309 Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : ""));
1313 vsp->fileName = ToString(dp->d_name);
1323 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1327 /* Find the link table. This should be associated with the RW volume or, if
1328 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1331 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1332 struct ViceInodeInfo *allInodes)
1335 struct ViceInodeInfo *ip;
1337 for (i = 0; i < nVols; i++) {
1338 ip = allInodes + isp[i].index;
1339 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1340 if (ip[j].u.special.type == VI_LINKTABLE)
1341 return ip[j].inodeNumber;
1348 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1350 struct versionStamp version;
1353 if (!VALID_INO(ino))
1355 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1356 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1357 if (!VALID_INO(ino))
1359 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1360 isp->RWvolumeId, errno);
1361 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1362 fdP = IH_OPEN(VGLinkH);
1364 Abort("Can't open link table for volume %u (error = %d)\n",
1365 isp->RWvolumeId, errno);
1367 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1368 Abort("Can't truncate link table for volume %u (error = %d)\n",
1369 isp->RWvolumeId, errno);
1371 version.magic = LINKTABLEMAGIC;
1372 version.version = LINKTABLEVERSION;
1374 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1376 Abort("Can't truncate link table for volume %u (error = %d)\n",
1377 isp->RWvolumeId, errno);
1379 FDH_REALLYCLOSE(fdP);
1381 /* If the volume summary exits (i.e., the V*.vol header file exists),
1382 * then set this inode there as well.
1384 if (isp->volSummary)
1385 isp->volSummary->header.linkTable = ino;
1394 SVGParms_t *parms = (SVGParms_t *) arg;
1395 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1400 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1403 pthread_attr_t tattr;
1407 /* Initialize per volume global variables, even if later code does so */
1411 memset(&VolInfo, 0, sizeof(VolInfo));
1413 parms.svgp_inodeSummaryp = isp;
1414 parms.svgp_count = nVols;
1415 code = pthread_attr_init(&tattr);
1417 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1421 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1423 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1426 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1428 Log("Failed to create thread to salvage volume group %u\n",
1432 (void)pthread_join(tid, NULL);
1434 #endif /* AFS_NT40_ENV */
1437 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1439 struct ViceInodeInfo *inodes, *allInodes, *ip;
1440 int i, totalInodes, size, salvageTo;
1444 int dec_VGLinkH = 0;
1446 FdHandle_t *fdP = NULL;
1449 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1450 && isp->nSpecialInodes > 0);
1451 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1452 if (!ForceSalvage && QuickCheck(isp, nVols))
1455 if (ShowMounts && !haveRWvolume)
1457 if (canfork && !debug && Fork() != 0) {
1458 (void)Wait("Salvage volume group");
1461 for (i = 0, totalInodes = 0; i < nVols; i++)
1462 totalInodes += isp[i].nInodes;
1463 size = totalInodes * sizeof(struct ViceInodeInfo);
1464 inodes = (struct ViceInodeInfo *)malloc(size);
1465 allInodes = inodes - isp->index; /* this would the base of all the inodes
1466 * for the partition, if all the inodes
1467 * had been read into memory */
1469 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1471 assert(read(inodeFd, inodes, size) == size);
1473 /* Don't try to salvage a read write volume if there isn't one on this
1475 salvageTo = haveRWvolume ? 0 : 1;
1477 #ifdef AFS_NAMEI_ENV
1478 ino = FindLinkHandle(isp, nVols, allInodes);
1479 if (VALID_INO(ino)) {
1480 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1481 fdP = IH_OPEN(VGLinkH);
1483 if (!VALID_INO(ino) || fdP == NULL) {
1484 Log("%s link table for volume %u.\n",
1485 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1487 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1490 struct ViceInodeInfo *ip;
1491 CreateLinkTable(isp, ino);
1492 fdP = IH_OPEN(VGLinkH);
1493 /* Sync fake 1 link counts to the link table, now that it exists */
1495 for (i = 0; i < nVols; i++) {
1496 ip = allInodes + isp[i].index;
1497 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1499 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1501 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1509 FDH_REALLYCLOSE(fdP);
1511 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1514 /* Salvage in reverse order--read/write volume last; this way any
1515 * Inodes not referenced by the time we salvage the read/write volume
1516 * can be picked up by the read/write volume */
1517 /* ACTUALLY, that's not done right now--the inodes just vanish */
1518 for (i = nVols - 1; i >= salvageTo; i--) {
1520 struct InodeSummary *lisp = &isp[i];
1521 #ifdef AFS_NAMEI_ENV
1522 /* If only the RO is present on this partition, the link table
1523 * shows up as a RW volume special file. Need to make sure the
1524 * salvager doesn't try to salvage the non-existent RW.
1526 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1527 /* If this only special inode is the link table, continue */
1528 if (inodes->u.special.type == VI_LINKTABLE) {
1535 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1536 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1537 /* Check inodes twice. The second time do things seriously. This
1538 * way the whole RO volume can be deleted, below, if anything goes wrong */
1539 for (check = 1; check >= 0; check--) {
1541 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1543 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1544 if (rw && deleteMe) {
1545 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1546 * volume won't be called */
1552 if (rw && check == 1)
1554 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1555 MaybeZapVolume(lisp, "Vnode index", 0, check);
1561 /* Fix actual inode counts */
1563 Log("totalInodes %d\n",totalInodes);
1564 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1565 static int TraceBadLinkCounts = 0;
1566 #ifdef AFS_NAMEI_ENV
1567 if (VGLinkH->ih_ino == ip->inodeNumber) {
1568 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1569 VGLinkH_p1 = ip->u.param[0];
1570 continue; /* Deal with this last. */
1573 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1574 TraceBadLinkCounts--; /* Limit reports, per volume */
1575 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1577 while (ip->linkCount > 0) {
1578 /* below used to assert, not break */
1580 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1581 Log("idec failed. inode %s errno %d\n",
1582 PrintInode(NULL, ip->inodeNumber), errno);
1588 while (ip->linkCount < 0) {
1589 /* these used to be asserts */
1591 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1592 Log("iinc failed. inode %s errno %d\n",
1593 PrintInode(NULL, ip->inodeNumber), errno);
1600 #ifdef AFS_NAMEI_ENV
1601 while (dec_VGLinkH > 0) {
1602 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1603 Log("idec failed on link table, errno = %d\n", errno);
1607 while (dec_VGLinkH < 0) {
1608 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1609 Log("iinc failed on link table, errno = %d\n", errno);
1616 /* Directory consistency checks on the rw volume */
1618 SalvageVolume(isp, VGLinkH);
1619 IH_RELEASE(VGLinkH);
1621 if (canfork && !debug) {
1628 QuickCheck(register struct InodeSummary *isp, int nVols)
1630 /* Check headers BEFORE forking */
1634 for (i = 0; i < nVols; i++) {
1635 struct VolumeSummary *vs = isp[i].volSummary;
1636 VolumeDiskData volHeader;
1638 /* Don't salvage just because phantom rw volume is there... */
1639 /* (If a read-only volume exists, read/write inodes must also exist) */
1640 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
1644 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
1645 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
1646 == sizeof(volHeader)
1647 && volHeader.stamp.magic == VOLUMEINFOMAGIC
1648 && volHeader.dontSalvage == DONT_SALVAGE
1649 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
1650 if (volHeader.inUse != 0) {
1651 volHeader.inUse = 0;
1652 volHeader.inService = 1;
1654 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
1655 != sizeof(volHeader)) {
1671 /* SalvageVolumeHeaderFile
1673 * Salvage the top level V*.vol header file. Make sure the special files
1674 * exist and that there are no duplicates.
1676 * Calls SalvageHeader for each possible type of volume special file.
1680 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
1681 register struct ViceInodeInfo *inodes, int RW,
1682 int check, int *deleteMe)
1686 register struct ViceInodeInfo *ip;
1687 int allinodesobsolete = 1;
1688 struct VolumeDiskHeader diskHeader;
1692 memset(&tempHeader, 0, sizeof(tempHeader));
1693 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
1694 tempHeader.stamp.version = VOLUMEHEADERVERSION;
1695 tempHeader.id = isp->volumeId;
1696 tempHeader.parent = isp->RWvolumeId;
1697 /* Check for duplicates (inodes are sorted by type field) */
1698 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
1699 ip = &inodes[isp->index + i];
1700 if (ip->u.special.type == (ip + 1)->u.special.type) {
1702 Log("Duplicate special inodes in volume header; salvage of volume %u aborted\n", isp->volumeId);
1706 for (i = 0; i < isp->nSpecialInodes; i++) {
1707 ip = &inodes[isp->index + i];
1708 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
1710 Log("Rubbish header inode\n");
1713 Log("Rubbish header inode; deleted\n");
1714 } else if (!stuff[ip->u.special.type - 1].obsolete) {
1715 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
1716 if (!check && ip->u.special.type != VI_LINKTABLE)
1717 ip->linkCount--; /* Keep the inode around */
1718 allinodesobsolete = 0;
1722 if (allinodesobsolete) {
1729 VGLinkH_cnt++; /* one for every header. */
1731 if (!RW && !check && isp->volSummary) {
1732 ClearROInUseBit(isp->volSummary);
1736 for (i = 0; i < MAXINODETYPE; i++) {
1737 if (stuff[i].inodeType == VI_LINKTABLE) {
1738 /* Gross hack: SalvageHeader does a bcmp on the volume header.
1739 * And we may have recreated the link table earlier, so set the
1740 * RW header as well.
1742 if (VALID_INO(VGLinkH->ih_ino)) {
1743 *stuff[i].inode = VGLinkH->ih_ino;
1747 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
1751 if (isp->volSummary == NULL) {
1753 char headerName[64];
1754 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1755 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1757 Log("No header file for volume %u\n", isp->volumeId);
1761 Log("No header file for volume %u; %screating %s\n",
1762 isp->volumeId, (Testing ? "it would have been " : ""),
1764 headerFd = afs_open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
1765 assert(headerFd != -1);
1766 isp->volSummary = (struct VolumeSummary *)
1767 malloc(sizeof(struct VolumeSummary));
1768 isp->volSummary->fileName = ToString(headerName);
1771 char headerName[64];
1772 /* hack: these two fields are obsolete... */
1773 isp->volSummary->header.volumeAcl = 0;
1774 isp->volSummary->header.volumeMountTable = 0;
1777 (&isp->volSummary->header, &tempHeader,
1778 sizeof(struct VolumeHeader))) {
1779 /* We often remove the name before calling us, so we make a fake one up */
1780 if (isp->volSummary->fileName) {
1781 strcpy(headerName, isp->volSummary->fileName);
1783 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
1784 isp->volSummary->fileName = ToString(headerName);
1786 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
1788 Log("Header file %s is damaged or no longer valid%s\n", path,
1789 (check ? "" : "; repairing"));
1793 headerFd = afs_open(path, O_RDWR | O_TRUNC, 0644);
1794 assert(headerFd != -1);
1798 memcpy(&isp->volSummary->header, &tempHeader,
1799 sizeof(struct VolumeHeader));
1802 Log("It would have written a new header file for volume %u\n",
1805 VolumeHeaderToDisk(&diskHeader, &tempHeader);
1806 if (write(headerFd, &diskHeader, sizeof(struct VolumeDiskHeader))
1807 != sizeof(struct VolumeDiskHeader)) {
1808 Log("Couldn't rewrite volume header file!\n");
1815 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
1816 isp->volSummary->header.volumeInfo);
1821 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
1825 VolumeDiskData volumeInfo;
1826 struct versionStamp fileHeader;
1835 #ifndef AFS_NAMEI_ENV
1836 if (sp->inodeType == VI_LINKTABLE)
1839 if (*(sp->inode) == 0) {
1841 Log("Missing inode in volume header (%s)\n", sp->description);
1845 Log("Missing inode in volume header (%s); %s\n", sp->description,
1846 (Testing ? "it would have recreated it" : "recreating"));
1849 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1850 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
1851 if (!VALID_INO(*(sp->inode)))
1853 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
1854 sp->description, errno);
1859 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
1860 fdP = IH_OPEN(specH);
1861 if (OKToZap && (fdP == NULL) && BadError(errno)) {
1862 /* bail out early and destroy the volume */
1864 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
1871 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
1872 sp->description, errno);
1875 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
1876 || header.fileHeader.magic != sp->stamp.magic)) {
1878 Log("Part of the header (%s) is corrupted\n", sp->description);
1879 FDH_REALLYCLOSE(fdP);
1883 Log("Part of the header (%s) is corrupted; recreating\n",
1887 if (sp->inodeType == VI_VOLINFO
1888 && header.volumeInfo.destroyMe == DESTROY_ME) {
1891 FDH_REALLYCLOSE(fdP);
1895 if (recreate && !Testing) {
1898 ("Internal error: recreating volume header (%s) in check mode\n",
1900 code = FDH_TRUNC(fdP, 0);
1902 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
1903 sp->description, errno);
1905 /* The following code should be moved into vutil.c */
1906 if (sp->inodeType == VI_VOLINFO) {
1908 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
1909 header.volumeInfo.stamp = sp->stamp;
1910 header.volumeInfo.id = isp->volumeId;
1911 header.volumeInfo.parentId = isp->RWvolumeId;
1912 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
1913 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
1914 isp->volumeId, isp->volumeId);
1915 header.volumeInfo.inService = 0;
1916 header.volumeInfo.blessed = 0;
1917 /* The + 1000 is a hack in case there are any files out in venus caches */
1918 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
1919 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
1920 header.volumeInfo.needsCallback = 0;
1921 gettimeofday(&tp, 0);
1922 header.volumeInfo.creationDate = tp.tv_sec;
1923 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1925 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1926 sp->description, errno);
1929 FDH_WRITE(fdP, (char *)&header.volumeInfo,
1930 sizeof(header.volumeInfo));
1931 if (code != sizeof(header.volumeInfo)) {
1934 ("Unable to write volume header file (%s) (errno = %d)\n",
1935 sp->description, errno);
1936 Abort("Unable to write entire volume header file (%s)\n",
1940 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
1942 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
1943 sp->description, errno);
1945 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
1946 if (code != sizeof(sp->stamp)) {
1949 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
1950 sp->description, errno);
1952 ("Unable to write entire version stamp in volume header file (%s)\n",
1957 FDH_REALLYCLOSE(fdP);
1959 if (sp->inodeType == VI_VOLINFO) {
1960 VolInfo = header.volumeInfo;
1963 if (VolInfo.updateDate) {
1964 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
1966 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
1967 (Testing ? "it would have been " : ""), update);
1969 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
1971 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
1972 VolInfo.id, update);
1982 SalvageVnodes(register struct InodeSummary *rwIsp,
1983 register struct InodeSummary *thisIsp,
1984 register struct ViceInodeInfo *inodes, int check)
1986 int ilarge, ismall, ioffset, RW, nInodes;
1987 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
1990 RW = (rwIsp == thisIsp);
1991 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
1993 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
1994 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
1995 if (check && ismall == -1)
1998 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
1999 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2000 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2004 SalvageIndex(Inode ino, VnodeClass class, int RW,
2005 register struct ViceInodeInfo *ip, int nInodes,
2006 struct VolumeSummary *volSummary, int check)
2008 VolumeId volumeNumber;
2009 char buf[SIZEOF_LARGEDISKVNODE];
2010 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2012 StreamHandle_t *file;
2013 struct VnodeClassInfo *vcp;
2015 afs_fsize_t vnodeLength;
2016 int vnodeIndex, nVnodes;
2017 afs_ino_str_t stmp1, stmp2;
2021 volumeNumber = volSummary->header.id;
2022 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2023 fdP = IH_OPEN(handle);
2024 assert(fdP != NULL);
2025 file = FDH_FDOPEN(fdP, "r+");
2026 assert(file != NULL);
2027 vcp = &VnodeClassInfo[class];
2028 size = OS_SIZE(fdP->fd_fd);
2030 nVnodes = (size / vcp->diskSize) - 1;
2032 assert((nVnodes + 1) * vcp->diskSize == size);
2033 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2037 for (vnodeIndex = 0;
2038 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2039 nVnodes--, vnodeIndex++) {
2040 if (vnode->type != vNull) {
2041 int vnodeChanged = 0;
2042 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2043 /* Log programs that belong to root (potentially suid root);
2044 * don't bother for read-only or backup volumes */
2045 #ifdef notdef /* This is done elsewhere */
2046 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2047 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2049 if (VNDISK_GET_INO(vnode) == 0) {
2051 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2052 memset(vnode, 0, vcp->diskSize);
2056 if (vcp->magic != vnode->vnodeMagic) {
2057 /* bad magic #, probably partially created vnode */
2058 Log("Partially allocated vnode %d deleted.\n",
2060 memset(vnode, 0, vcp->diskSize);
2064 /* ****** Should do a bit more salvage here: e.g. make sure
2065 * vnode type matches what it should be given the index */
2066 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2067 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2068 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2069 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2076 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2077 /* The following doesn't work, because the version number
2078 * is not maintained correctly by the file server */
2079 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2080 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2082 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2088 /* For RW volume, look for vnode with matching inode number;
2089 * if no such match, take the first determined by our sort
2091 register struct ViceInodeInfo *lip = ip;
2092 register int lnInodes = nInodes;
2094 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2095 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2104 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2105 /* "Matching" inode */
2109 vu = vnode->uniquifier;
2110 iu = ip->u.vnode.vnodeUniquifier;
2111 vd = vnode->dataVersion;
2112 id = ip->u.vnode.inodeDataVersion;
2114 * Because of the possibility of the uniquifier overflows (> 4M)
2115 * we compare them modulo the low 22-bits; we shouldn't worry
2116 * about mismatching since they shouldn't to many old
2117 * uniquifiers of the same vnode...
2119 if (IUnique(vu) != IUnique(iu)) {
2121 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2124 vnode->uniquifier = iu;
2125 #ifdef AFS_3DISPARES
2126 vnode->dataVersion = (id >= vd ?
2129 1887437 ? vd : id) :
2132 1887437 ? id : vd));
2134 #if defined(AFS_SGI_EXMAG)
2135 vnode->dataVersion = (id >= vd ?
2138 15099494 ? vd : id) :
2141 15099494 ? id : vd));
2143 vnode->dataVersion = (id > vd ? id : vd);
2144 #endif /* AFS_SGI_EXMAG */
2145 #endif /* AFS_3DISPARES */
2148 /* don't bother checking for vd > id any more, since
2149 * partial file transfers always result in this state,
2150 * and you can't do much else anyway (you've already
2151 * found the best data you can) */
2152 #ifdef AFS_3DISPARES
2153 if (!vnodeIsDirectory(vnodeNumber)
2154 && ((vd < id && (id - vd) < 1887437)
2155 || ((vd > id && (vd - id) > 1887437)))) {
2157 #if defined(AFS_SGI_EXMAG)
2158 if (!vnodeIsDirectory(vnodeNumber)
2159 && ((vd < id && (id - vd) < 15099494)
2160 || ((vd > id && (vd - id) > 15099494)))) {
2162 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2163 #endif /* AFS_SGI_EXMAG */
2166 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2167 vnode->dataVersion = id;
2172 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2175 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2177 VNDISK_SET_INO(vnode, ip->inodeNumber);
2182 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2184 VNDISK_SET_INO(vnode, ip->inodeNumber);
2187 VNDISK_GET_LEN(vnodeLength, vnode);
2188 if (ip->byteCount != vnodeLength) {
2191 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2196 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2197 VNDISK_SET_LEN(vnode, ip->byteCount);
2201 ip->linkCount--; /* Keep the inode around */
2204 } else { /* no matching inode */
2205 if (VNDISK_GET_INO(vnode) != 0
2206 || vnode->type == vDirectory) {
2207 /* No matching inode--get rid of the vnode */
2209 if (VNDISK_GET_INO(vnode)) {
2211 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2215 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2220 if (VNDISK_GET_INO(vnode)) {
2222 time_t serverModifyTime = vnode->serverModifyTime;
2223 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2227 time_t serverModifyTime = vnode->serverModifyTime;
2228 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2231 memset(vnode, 0, vcp->diskSize);
2234 /* Should not reach here becuase we checked for
2235 * (inodeNumber == 0) above. And where we zero the vnode,
2236 * we also goto vnodeDone.
2240 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2244 } /* VNDISK_GET_INO(vnode) != 0 */
2246 assert(!(vnodeChanged && check));
2247 if (vnodeChanged && !Testing) {
2249 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2250 (char *)vnode, vcp->diskSize)
2252 VolumeChanged = 1; /* For break call back */
2263 struct VnodeEssence *
2264 CheckVnodeNumber(VnodeId vnodeNumber)
2267 struct VnodeInfo *vip;
2270 class = vnodeIdToClass(vnodeNumber);
2271 vip = &vnodeInfo[class];
2272 offset = vnodeIdToBitNumber(vnodeNumber);
2273 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2277 CopyOnWrite(register struct DirSummary *dir)
2279 /* Copy the directory unconditionally if we are going to change it:
2280 * not just if was cloned.
2282 struct VnodeDiskObject vnode;
2283 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2284 Inode oldinode, newinode;
2287 if (dir->copied || Testing)
2289 DFlush(); /* Well justified paranoia... */
2292 IH_IREAD(vnodeInfo[vLarge].handle,
2293 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2295 assert(code == sizeof(vnode));
2296 oldinode = VNDISK_GET_INO(&vnode);
2297 /* Increment the version number by a whole lot to avoid problems with
2298 * clients that were promised new version numbers--but the file server
2299 * crashed before the versions were written to disk.
2302 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2303 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2305 assert(VALID_INO(newinode));
2306 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2308 VNDISK_SET_INO(&vnode, newinode);
2310 IH_IWRITE(vnodeInfo[vLarge].handle,
2311 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2313 assert(code == sizeof(vnode));
2315 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2316 fileSysDevice, newinode);
2317 /* Don't delete the original inode right away, because the directory is
2318 * still being scanned.
2324 * This function should either successfully create a new dir, or give up
2325 * and leave things the way they were. In particular, if it fails to write
2326 * the new dir properly, it should return w/o changing the reference to the
2330 CopyAndSalvage(register struct DirSummary *dir)
2332 struct VnodeDiskObject vnode;
2333 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2334 Inode oldinode, newinode;
2339 afs_int32 parentUnique = 1;
2340 struct VnodeEssence *vnodeEssence;
2345 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2347 IH_IREAD(vnodeInfo[vLarge].handle,
2348 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2350 assert(lcode == sizeof(vnode));
2351 oldinode = VNDISK_GET_INO(&vnode);
2352 /* Increment the version number by a whole lot to avoid problems with
2353 * clients that were promised new version numbers--but the file server
2354 * crashed before the versions were written to disk.
2357 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2358 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2360 assert(VALID_INO(newinode));
2361 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2363 /* Assign . and .. vnode numbers from dir and vnode.parent.
2364 * The uniquifier for . is in the vnode.
2365 * The uniquifier for .. might be set to a bogus value of 1 and
2366 * the salvager will later clean it up.
2368 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2369 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2372 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2374 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2379 /* didn't really build the new directory properly, let's just give up. */
2380 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2381 Log("Directory salvage returned code %d, continuing.\n", code);
2383 Log("also failed to decrement link count on new inode");
2387 Log("Checking the results of the directory salvage...\n");
2388 if (!DirOK(&newdir)) {
2389 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2390 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2395 VNDISK_SET_INO(&vnode, newinode);
2396 length = Length(&newdir);
2397 VNDISK_SET_LEN(&vnode, length);
2399 IH_IWRITE(vnodeInfo[vLarge].handle,
2400 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2402 assert(lcode == sizeof(vnode));
2405 nt_sync(fileSysDevice);
2407 sync(); /* this is slow, but hopefully rarely called. We don't have
2408 * an open FD on the file itself to fsync.
2412 vnodeInfo[vLarge].handle->ih_synced = 1;
2414 /* make sure old directory file is really closed */
2415 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2416 FDH_REALLYCLOSE(fdP);
2418 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2420 dir->dirHandle = newdir;
2424 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2427 struct DirSummary *dir = (struct DirSummary *)dirVal;
2428 struct VnodeEssence *vnodeEssence;
2429 afs_int32 dirOrphaned, todelete;
2431 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2433 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2434 if (vnodeEssence == NULL) {
2436 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2440 assert(Delete(&dir->dirHandle, name) == 0);
2445 #ifndef AFS_NAMEI_ENV
2446 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2447 * mount inode for the partition. If this inode were deleted, it would crash
2450 if (vnodeEssence->InodeNumber == 0) {
2451 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2454 assert(Delete(&dir->dirHandle, name) == 0);
2461 if (!(vnodeNumber & 1) && !Showmode
2462 && !(vnodeEssence->count || vnodeEssence->unique
2463 || vnodeEssence->modeBits)) {
2464 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2465 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2466 vnodeNumber, unique,
2467 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2472 assert(Delete(&dir->dirHandle, name) == 0);
2478 /* Check if the Uniquifiers match. If not, change the directory entry
2479 * so its unique matches the vnode unique. Delete if the unique is zero
2480 * or if the directory is orphaned.
2482 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2483 if (!vnodeEssence->unique
2484 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2485 /* This is an orphaned directory. Don't delete the . or ..
2486 * entry. Otherwise, it will get created in the next
2487 * salvage and deleted again here. So Just skip it.
2492 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2495 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2499 fid.Vnode = vnodeNumber;
2500 fid.Unique = vnodeEssence->unique;
2502 assert(Delete(&dir->dirHandle, name) == 0);
2504 assert(Create(&dir->dirHandle, name, &fid) == 0);
2507 return 0; /* no need to continue */
2510 if (strcmp(name, ".") == 0) {
2511 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
2514 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2517 assert(Delete(&dir->dirHandle, ".") == 0);
2518 fid.Vnode = dir->vnodeNumber;
2519 fid.Unique = dir->unique;
2520 assert(Create(&dir->dirHandle, ".", &fid) == 0);
2523 vnodeNumber = fid.Vnode; /* Get the new Essence */
2524 unique = fid.Unique;
2525 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2528 } else if (strcmp(name, "..") == 0) {
2531 struct VnodeEssence *dotdot;
2532 pa.Vnode = dir->parent;
2533 dotdot = CheckVnodeNumber(pa.Vnode);
2534 assert(dotdot != NULL); /* XXX Should not be assert */
2535 pa.Unique = dotdot->unique;
2537 pa.Vnode = dir->vnodeNumber;
2538 pa.Unique = dir->unique;
2540 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
2542 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
2545 assert(Delete(&dir->dirHandle, "..") == 0);
2546 assert(Create(&dir->dirHandle, "..", &pa) == 0);
2549 vnodeNumber = pa.Vnode; /* Get the new Essence */
2551 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2553 dir->haveDotDot = 1;
2554 } else if (strncmp(name, ".__afs", 6) == 0) {
2556 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
2560 assert(Delete(&dir->dirHandle, name) == 0);
2562 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
2563 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
2566 if (ShowSuid && (vnodeEssence->modeBits & 06000))
2567 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2568 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
2569 && !(vnodeEssence->modeBits & 0111)) {
2575 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
2576 vnodeEssence->InodeNumber);
2579 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
2583 size = FDH_SIZE(fdP);
2585 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
2586 FDH_REALLYCLOSE(fdP);
2593 code = FDH_READ(fdP, buf, size);
2596 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
2597 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
2598 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
2599 Testing ? "would convert" : "converted");
2600 vnodeEssence->modeBits |= 0111;
2601 vnodeEssence->changed = 1;
2602 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
2603 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
2604 dir->name ? dir->name : "??", name, buf);
2606 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
2607 dir->vname, vnodeNumber, size, code);
2609 FDH_REALLYCLOSE(fdP);
2612 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
2613 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
2614 if (vnodeIdToClass(vnodeNumber) == vLarge
2615 && vnodeEssence->name == NULL) {
2617 if ((n = (char *)malloc(strlen(name) + 1)))
2619 vnodeEssence->name = n;
2622 /* The directory entry points to the vnode. Check to see if the
2623 * vnode points back to the directory. If not, then let the
2624 * directory claim it (else it might end up orphaned). Vnodes
2625 * already claimed by another directory are deleted from this
2626 * directory: hardlinks to the same vnode are not allowed
2627 * from different directories.
2629 if (vnodeEssence->parent != dir->vnodeNumber) {
2630 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
2631 /* Vnode does not point back to this directory.
2632 * Orphaned dirs cannot claim a file (it may belong to
2633 * another non-orphaned dir).
2636 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
2638 vnodeEssence->parent = dir->vnodeNumber;
2639 vnodeEssence->changed = 1;
2641 /* Vnode was claimed by another directory */
2644 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2645 } else if (vnodeNumber == 1) {
2646 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
2648 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
2653 assert(Delete(&dir->dirHandle, name) == 0);
2658 /* This directory claims the vnode */
2659 vnodeEssence->claimed = 1;
2661 vnodeEssence->count--;
2666 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
2668 register struct VnodeInfo *vip = &vnodeInfo[class];
2669 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
2670 char buf[SIZEOF_LARGEDISKVNODE];
2671 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2673 StreamHandle_t *file;
2678 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
2679 fdP = IH_OPEN(vip->handle);
2680 assert(fdP != NULL);
2681 file = FDH_FDOPEN(fdP, "r+");
2682 assert(file != NULL);
2683 size = OS_SIZE(fdP->fd_fd);
2685 vip->nVnodes = (size / vcp->diskSize) - 1;
2686 if (vip->nVnodes > 0) {
2687 assert((vip->nVnodes + 1) * vcp->diskSize == size);
2688 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2689 assert((vip->vnodes = (struct VnodeEssence *)
2690 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
2691 if (class == vLarge) {
2692 assert((vip->inodes = (Inode *)
2693 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
2702 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
2703 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
2704 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2705 nVnodes--, vnodeIndex++) {
2706 if (vnode->type != vNull) {
2707 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
2708 afs_fsize_t vnodeLength;
2709 vip->nAllocatedVnodes++;
2710 vep->count = vnode->linkCount;
2711 VNDISK_GET_LEN(vnodeLength, vnode);
2712 vep->blockCount = nBlocks(vnodeLength);
2713 vip->volumeBlockCount += vep->blockCount;
2714 vep->parent = vnode->parent;
2715 vep->unique = vnode->uniquifier;
2716 if (*maxu < vnode->uniquifier)
2717 *maxu = vnode->uniquifier;
2718 vep->modeBits = vnode->modeBits;
2719 vep->InodeNumber = VNDISK_GET_INO(vnode);
2720 vep->type = vnode->type;
2721 vep->author = vnode->author;
2722 vep->owner = vnode->owner;
2723 vep->group = vnode->group;
2724 if (vnode->type == vDirectory) {
2725 if (class != vLarge) {
2726 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2727 vip->nAllocatedVnodes--;
2728 memset(vnode, 0, sizeof(vnode));
2729 IH_IWRITE(vnodeInfo[vSmall].handle,
2730 vnodeIndexOffset(vcp, vnodeNumber),
2731 (char *)&vnode, sizeof(vnode));
2734 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
2743 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
2745 struct VnodeEssence *parentvp;
2751 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
2752 && GetDirName(vp->parent, parentvp, path)) {
2754 strcat(path, vp->name);
2760 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
2761 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
2764 IsVnodeOrphaned(VnodeId vnode)
2766 struct VnodeEssence *vep;
2769 return (1); /* Vnode zero does not exist */
2771 return (0); /* The root dir vnode is always claimed */
2772 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
2773 if (!vep || !vep->claimed)
2774 return (1); /* Vnode is not claimed - it is orphaned */
2776 return (IsVnodeOrphaned(vep->parent));
2780 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
2781 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
2784 static struct DirSummary dir;
2785 static struct DirHandle dirHandle;
2786 struct VnodeEssence *parent;
2787 static char path[MAXPATHLEN];
2790 if (dirVnodeInfo->vnodes[i].salvaged)
2791 return; /* already salvaged */
2794 dirVnodeInfo->vnodes[i].salvaged = 1;
2796 if (dirVnodeInfo->inodes[i] == 0)
2797 return; /* Not allocated to a directory */
2799 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
2800 if (dirVnodeInfo->vnodes[i].parent) {
2801 Log("Bad parent, vnode 1; %s...\n",
2802 (Testing ? "skipping" : "salvaging"));
2803 dirVnodeInfo->vnodes[i].parent = 0;
2804 dirVnodeInfo->vnodes[i].changed = 1;
2807 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
2808 if (parent && parent->salvaged == 0)
2809 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
2810 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
2811 rootdir, rootdirfound);
2814 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
2815 dir.unique = dirVnodeInfo->vnodes[i].unique;
2818 dir.parent = dirVnodeInfo->vnodes[i].parent;
2819 dir.haveDot = dir.haveDotDot = 0;
2820 dir.ds_linkH = alinkH;
2821 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
2822 dirVnodeInfo->inodes[i]);
2824 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
2827 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
2828 (Testing ? "skipping" : "salvaging"));
2831 CopyAndSalvage(&dir);
2835 dirHandle = dir.dirHandle;
2838 GetDirName(bitNumberToVnodeNumber(i, vLarge),
2839 &dirVnodeInfo->vnodes[i], path);
2842 /* If enumeration failed for random reasons, we will probably delete
2843 * too much stuff, so we guard against this instead.
2845 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
2848 /* Delete the old directory if it was copied in order to salvage.
2849 * CopyOnWrite has written the new inode # to the disk, but we still
2850 * have the old one in our local structure here. Thus, we idec the
2854 if (dir.copied && !Testing) {
2855 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
2857 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
2860 /* Remember rootdir DirSummary _after_ it has been judged */
2861 if (dir.vnodeNumber == 1 && dir.unique == 1) {
2862 memcpy(rootdir, &dir, sizeof(struct DirSummary));
2870 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
2872 /* This routine, for now, will only be called for read-write volumes */
2874 int BlocksInVolume = 0, FilesInVolume = 0;
2875 register VnodeClass class;
2876 struct DirSummary rootdir, oldrootdir;
2877 struct VnodeInfo *dirVnodeInfo;
2878 struct VnodeDiskObject vnode;
2879 VolumeDiskData volHeader;
2881 int orphaned, rootdirfound = 0;
2882 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
2883 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
2884 struct VnodeEssence *vep;
2887 afs_sfsize_t nBytes;
2889 VnodeId LFVnode, ThisVnode;
2890 Unique LFUnique, ThisUnique;
2893 vid = rwIsp->volSummary->header.id;
2894 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
2895 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
2896 assert(nBytes == sizeof(volHeader));
2897 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
2898 assert(volHeader.destroyMe != DESTROY_ME);
2899 /* (should not have gotten this far with DESTROY_ME flag still set!) */
2901 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
2903 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
2906 dirVnodeInfo = &vnodeInfo[vLarge];
2907 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
2908 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
2912 nt_sync(fileSysDevice);
2914 sync(); /* This used to be done lower level, for every dir */
2921 /* Parse each vnode looking for orphaned vnodes and
2922 * connect them to the tree as orphaned (if requested).
2924 oldrootdir = rootdir;
2925 for (class = 0; class < nVNODECLASSES; class++) {
2926 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
2927 vep = &(vnodeInfo[class].vnodes[v]);
2928 ThisVnode = bitNumberToVnodeNumber(v, class);
2929 ThisUnique = vep->unique;
2931 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
2932 continue; /* Ignore unused, claimed, and root vnodes */
2934 /* This vnode is orphaned. If it is a directory vnode, then the '..'
2935 * entry in this vnode had incremented the parent link count (In
2936 * JudgeEntry()). We need to go to the parent and decrement that
2937 * link count. But if the parent's unique is zero, then the parent
2938 * link count was not incremented in JudgeEntry().
2940 if (class == vLarge) { /* directory vnode */
2941 pv = vnodeIdToBitNumber(vep->parent);
2942 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
2943 vnodeInfo[vLarge].vnodes[pv].count++;
2947 continue; /* If no rootdir, can't attach orphaned files */
2949 /* Here we attach orphaned files and directories into the
2950 * root directory, LVVnode, making sure link counts stay correct.
2952 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
2953 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
2954 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
2956 /* Update this orphaned vnode's info. Its parent info and
2957 * link count (do for orphaned directories and files).
2959 vep->parent = LFVnode; /* Parent is the root dir */
2960 vep->unique = LFUnique;
2963 vep->count--; /* Inc link count (root dir will pt to it) */
2965 /* If this orphaned vnode is a directory, change '..'.
2966 * The name of the orphaned dir/file is unknown, so we
2967 * build a unique name. No need to CopyOnWrite the directory
2968 * since it is not connected to tree in BK or RO volume and
2969 * won't be visible there.
2971 if (class == vLarge) {
2975 /* Remove and recreate the ".." entry in this orphaned directory */
2976 SetSalvageDirHandle(&dh, vid, fileSysDevice,
2977 vnodeInfo[class].inodes[v]);
2979 pa.Unique = LFUnique;
2980 assert(Delete(&dh, "..") == 0);
2981 assert(Create(&dh, "..", &pa) == 0);
2983 /* The original parent's link count was decremented above.
2984 * Here we increment the new parent's link count.
2986 pv = vnodeIdToBitNumber(LFVnode);
2987 vnodeInfo[vLarge].vnodes[pv].count--;
2991 /* Go to the root dir and add this entry. The link count of the
2992 * root dir was incremented when ".." was created. Try 10 times.
2994 for (j = 0; j < 10; j++) {
2995 pa.Vnode = ThisVnode;
2996 pa.Unique = ThisUnique;
2998 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3000 vLarge) ? "__ORPHANDIR__" :
3001 "__ORPHANFILE__"), ThisVnode,
3004 CopyOnWrite(&rootdir);
3005 code = Create(&rootdir.dirHandle, npath, &pa);
3009 ThisUnique += 50; /* Try creating a different file */
3012 Log("Attaching orphaned %s to volume's root dir as %s\n",
3013 ((class == vLarge) ? "directory" : "file"), npath);
3015 } /* for each vnode in the class */
3016 } /* for each class of vnode */
3018 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3020 if (!oldrootdir.copied && rootdir.copied) {
3022 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3025 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3028 DFlush(); /* Flush the changes */
3029 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3030 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3031 orphans = ORPH_IGNORE;
3034 /* Write out all changed vnodes. Orphaned files and directories
3035 * will get removed here also (if requested).
3037 for (class = 0; class < nVNODECLASSES; class++) {
3038 int nVnodes = vnodeInfo[class].nVnodes;
3039 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3040 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3041 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3042 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3043 for (i = 0; i < nVnodes; i++) {
3044 register struct VnodeEssence *vnp = &vnodes[i];
3045 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3047 /* If the vnode is good but is unclaimed (not listed in
3048 * any directory entries), then it is orphaned.
3051 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3052 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3056 if (vnp->changed || vnp->count) {
3060 IH_IREAD(vnodeInfo[class].handle,
3061 vnodeIndexOffset(vcp, vnodeNumber),
3062 (char *)&vnode, sizeof(vnode));
3063 assert(nBytes == sizeof(vnode));
3065 vnode.parent = vnp->parent;
3066 oldCount = vnode.linkCount;
3067 vnode.linkCount = vnode.linkCount - vnp->count;
3070 orphaned = IsVnodeOrphaned(vnodeNumber);
3072 if (!vnp->todelete) {
3073 /* Orphans should have already been attached (if requested) */
3074 assert(orphans != ORPH_ATTACH);
3075 oblocks += vnp->blockCount;
3078 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3080 BlocksInVolume -= vnp->blockCount;
3082 if (VNDISK_GET_INO(&vnode)) {
3084 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3087 memset(&vnode, 0, sizeof(vnode));
3089 } else if (vnp->count) {
3091 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3094 vnode.modeBits = vnp->modeBits;
3097 vnode.dataVersion++;
3100 IH_IWRITE(vnodeInfo[class].handle,
3101 vnodeIndexOffset(vcp, vnodeNumber),
3102 (char *)&vnode, sizeof(vnode));
3103 assert(nBytes == sizeof(vnode));
3109 if (!Showmode && ofiles) {
3110 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3112 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3116 for (class = 0; class < nVNODECLASSES; class++) {
3117 register struct VnodeInfo *vip = &vnodeInfo[class];
3118 for (i = 0; i < vip->nVnodes; i++)
3119 if (vip->vnodes[i].name)
3120 free(vip->vnodes[i].name);
3127 /* Set correct resource utilization statistics */
3128 volHeader.filecount = FilesInVolume;
3129 volHeader.diskused = BlocksInVolume;
3131 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3132 if (volHeader.uniquifier < (maxunique + 1)) {
3134 Log("Volume uniquifier is too low; fixed\n");
3135 /* Plus 2,000 in case there are workstations out there with
3136 * cached vnodes that have since been deleted
3138 volHeader.uniquifier = (maxunique + 1 + 2000);
3141 /* Turn off the inUse bit; the volume's been salvaged! */
3142 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3143 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3144 volHeader.inService = 1; /* allow service again */
3145 volHeader.needsCallback = (VolumeChanged != 0);
3146 volHeader.dontSalvage = DONT_SALVAGE;
3149 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3150 assert(nBytes == sizeof(volHeader));
3153 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3154 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3155 FilesInVolume, BlocksInVolume);
3157 IH_RELEASE(vnodeInfo[vSmall].handle);
3158 IH_RELEASE(vnodeInfo[vLarge].handle);
3164 ClearROInUseBit(struct VolumeSummary *summary)
3166 IHandle_t *h = summary->volumeInfoHandle;
3167 afs_sfsize_t nBytes;
3169 VolumeDiskData volHeader;
3171 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3172 assert(nBytes == sizeof(volHeader));
3173 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3174 volHeader.inUse = 0;
3175 volHeader.needsSalvaged = 0;
3176 volHeader.inService = 1;
3177 volHeader.dontSalvage = DONT_SALVAGE;
3179 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3180 assert(nBytes == sizeof(volHeader));
3185 * Possible delete the volume.
3187 * deleteMe - Always do so, only a partial volume.
3190 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3193 if (readOnly(isp) || deleteMe) {
3194 if (isp->volSummary && isp->volSummary->fileName) {
3197 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3199 Log("It will be deleted on this server (you may find it elsewhere)\n");
3202 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3204 Log("it will be deleted instead. It should be recloned.\n");
3208 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
3210 Log("Unable to unlink %s (errno = %d)\n", path, errno);
3214 } else if (!check) {
3215 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3217 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3223 AskOffline(VolumeId volumeId, char * partition)
3227 for (i = 0; i < 3; i++) {
3228 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL);
3230 if (code == SYNC_OK) {
3232 } else if (code == SYNC_DENIED) {
3233 #ifdef DEMAND_ATTACH_ENABLE
3234 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3236 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3238 Abort("Salvage aborted\n");
3239 } else if (code == SYNC_BAD_COMMAND) {
3240 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3242 #ifdef DEMAND_ATTACH_ENABLE
3243 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3245 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3247 Abort("Salvage aborted\n");
3250 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3251 FSYNC_clientFinis();
3255 if (code != SYNC_OK) {
3256 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3257 Abort("Salvage aborted\n");
3260 #ifdef AFS_DEMAND_ATTACH_FS
3261 /* set inUse = programType in the volume header. We do this in case
3262 * the fileserver restarts/crashes while we are salvaging.
3263 * Otherwise, the fileserver could attach the volume again on
3264 * startup while we are salvaging, which would be very bad, or
3265 * schedule another salvage while we are salvaging, which would be
3270 char name[VMAXPATHLEN];
3271 struct VolumeHeader header;
3272 struct VolumeDiskHeader diskHeader;
3273 struct VolumeDiskData volHeader;
3275 afs_snprintf(name, sizeof(name), "%s/" VFORMAT, fileSysPathName,
3276 afs_printable_uint32_lu(volumeId));
3278 fd = afs_open(name, O_RDONLY);
3282 if (read(fd, &diskHeader, sizeof(diskHeader)) != sizeof(diskHeader) ||
3283 diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
3290 DiskToVolumeHeader(&header, &diskHeader);
3292 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
3293 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
3294 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
3300 volHeader.inUse = programType;
3302 /* If we can't re-write the header, bail out and error. We don't
3303 * assert when reading the header, since it's possible the
3304 * header isn't really there (when there's no data associated
3305 * with the volume; we just delete the vol header file in that
3306 * case). But if it's there enough that we can read it, but
3307 * somehow we cannot write to it to signify we're salvaging it,
3308 * we've got a big problem and we cannot continue. */
3309 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
3313 #endif /* AFS_DEMAND_ATTACH_FS */
3317 AskOnline(VolumeId volumeId, char *partition)
3321 for (i = 0; i < 3; i++) {
3322 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3324 if (code == SYNC_OK) {
3326 } else if (code == SYNC_DENIED) {
3327 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3328 } else if (code == SYNC_BAD_COMMAND) {
3329 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3331 #ifdef DEMAND_ATTACH_ENABLE
3332 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3334 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3339 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3340 FSYNC_clientFinis();
3347 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3349 /* Volume parameter is passed in case iopen is upgraded in future to
3350 * require a volume Id to be passed
3353 IHandle_t *srcH, *destH;
3354 FdHandle_t *srcFdP, *destFdP;
3357 IH_INIT(srcH, device, rwvolume, inode1);
3358 srcFdP = IH_OPEN(srcH);
3359 assert(srcFdP != NULL);
3360 IH_INIT(destH, device, rwvolume, inode2);
3361 destFdP = IH_OPEN(destH);
3363 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3364 assert(FDH_WRITE(destFdP, buf, n) == n);
3366 FDH_REALLYCLOSE(srcFdP);
3367 FDH_REALLYCLOSE(destFdP);
3374 PrintInodeList(void)
3376 register struct ViceInodeInfo *ip;
3377 struct ViceInodeInfo *buf;
3378 struct afs_stat status;
3379 register int nInodes;
3381 assert(afs_fstat(inodeFd, &status) == 0);
3382 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3383 assert(buf != NULL);
3384 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3385 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3386 for (ip = buf; nInodes--; ip++) {
3387 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3388 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3389 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3390 ip->u.param[2], ip->u.param[3]);
3396 PrintInodeSummary(void)
3399 struct InodeSummary *isp;
3401 for (i = 0; i < nVolumesInInodeFile; i++) {
3402 isp = &inodeSummary[i];
3403 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3408 PrintVolumeSummary(void)
3411 struct VolumeSummary *vsp;
3413 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3414 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3424 assert(0); /* Fork is never executed in the NT code path */
3428 #ifdef AFS_DEMAND_ATTACH_FS
3429 if ((f == 0) && (programType == salvageServer)) {
3430 /* we are a salvageserver child */
3431 #ifdef FSSYNC_BUILD_CLIENT
3432 VChildProcReconnectFS_r();
3434 #ifdef SALVSYNC_BUILD_CLIENT
3438 #endif /* AFS_DEMAND_ATTACH_FS */
3439 #endif /* !AFS_NT40_ENV */
3449 #ifdef AFS_DEMAND_ATTACH_FS
3450 if (programType == salvageServer) {
3451 #ifdef SALVSYNC_BUILD_CLIENT
3454 #ifdef FSSYNC_BUILD_CLIENT
3458 #endif /* AFS_DEMAND_ATTACH_FS */
3461 if (main_thread != pthread_self())
3462 pthread_exit((void *)code);
3475 pid = wait(&status);
3477 if (WCOREDUMP(status))
3478 Log("\"%s\" core dumped!\n", prog);
3479 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
3485 TimeStamp(time_t clock, int precision)
3488 static char timestamp[20];
3489 lt = localtime(&clock);
3491 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
3493 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
3498 CheckLogFile(char * log_path)
3500 char oldSlvgLog[AFSDIR_PATH_MAX];
3502 #ifndef AFS_NT40_ENV
3509 strcpy(oldSlvgLog, log_path);
3510 strcat(oldSlvgLog, ".old");
3512 renamefile(log_path, oldSlvgLog);
3513 logFile = afs_fopen(log_path, "a");
3515 if (!logFile) { /* still nothing, use stdout */
3519 #ifndef AFS_NAMEI_ENV
3520 AFS_DEBUG_IOPS_LOG(logFile);
3525 #ifndef AFS_NT40_ENV
3527 TimeStampLogFile(char * log_path)
3529 char stampSlvgLog[AFSDIR_PATH_MAX];
3534 lt = localtime(&now);
3535 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
3536 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
3537 log_path, lt->tm_year + 1900,
3538 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
3541 /* try to link the logfile to a timestamped filename */
3542 /* if it fails, oh well, nothing we can do */
3543 link(log_path, stampSlvgLog);
3552 #ifndef AFS_NT40_ENV
3554 printf("Can't show log since using syslog.\n");
3563 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
3566 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
3569 while (fgets(line, sizeof(line), logFile))
3576 Log(const char *format, ...)
3582 va_start(args, format);
3583 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3585 #ifndef AFS_NT40_ENV
3587 syslog(LOG_INFO, "%s", tmp);
3591 gettimeofday(&now, 0);
3592 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
3598 Abort(const char *format, ...)
3603 va_start(args, format);
3604 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
3606 #ifndef AFS_NT40_ENV
3608 syslog(LOG_INFO, "%s", tmp);
3612 fprintf(logFile, "%s", tmp);
3627 p = (char *)malloc(strlen(s) + 1);
3633 /* Remove the FORCESALVAGE file */
3635 RemoveTheForce(char *path)
3638 struct afs_stat force; /* so we can use afs_stat to find it */
3639 strcpy(target,path);
3640 strcat(target,"/FORCESALVAGE");
3641 if (!Testing && ForceSalvage) {
3642 if (afs_stat(target,&force) == 0) unlink(target);
3646 #ifndef AFS_AIX32_ENV
3648 * UseTheForceLuke - see if we can use the force
3651 UseTheForceLuke(char *path)
3653 struct afs_stat force;
3655 strcpy(target,path);
3656 strcat(target,"/FORCESALVAGE");
3658 return (afs_stat(target, &force) == 0);
3662 * UseTheForceLuke - see if we can use the force
3665 * The VRMIX fsck will not muck with the filesystem it is supposedly
3666 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
3667 * muck directly with the root inode, which is within the normal
3669 * ListViceInodes() has a side effect of setting ForceSalvage if
3670 * it detects a need, based on root inode examination.
3673 UseTheForceLuke(char *path)
3676 return 0; /* sorry OB1 */
3681 /* NT support routines */
3683 static char execpathname[MAX_PATH];
3685 nt_SalvagePartition(char *partName, int jobn)
3690 if (!*execpathname) {
3691 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
3692 if (!n || n == 1023)
3695 job.cj_magic = SALVAGER_MAGIC;
3696 job.cj_number = jobn;
3697 (void)strcpy(job.cj_part, partName);
3698 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
3703 nt_SetupPartitionSalvage(void *datap, int len)
3705 childJob_t *jobp = (childJob_t *) datap;
3706 char logname[AFSDIR_PATH_MAX];
3708 if (len != sizeof(childJob_t))
3710 if (jobp->cj_magic != SALVAGER_MAGIC)
3715 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
3717 logFile = afs_fopen(logname, "w");
3725 #endif /* AFS_NT40_ENV */