2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
193 #include "vol_internal.h"
195 #include <afs/prs_fs.h>
197 #ifdef FSSYNC_BUILD_CLIENT
198 #include "vg_cache.h"
205 /*@+fcnmacros +macrofcndecl@*/
208 extern off64_t afs_lseek(int FD, off64_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
211 #define afs_stat stat64
212 #define afs_fstat fstat64
213 #define afs_open open64
214 #define afs_fopen fopen64
215 #else /* !O_LARGEFILE */
217 extern off_t afs_lseek(int FD, off_t O, int F);
218 #endif /*S_SPLINT_S */
219 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
220 #define afs_stat stat
221 #define afs_fstat fstat
222 #define afs_open open
223 #define afs_fopen fopen
224 #endif /* !O_LARGEFILE */
225 /*@=fcnmacros =macrofcndecl@*/
228 extern void *calloc();
230 static char *TimeStamp(time_t clock, int precision);
233 int debug; /* -d flag */
234 extern int Testing; /* -n flag */
235 int ListInodeOption; /* -i flag */
236 int ShowRootFiles; /* -r flag */
237 int RebuildDirs; /* -sal flag */
238 int Parallel = 4; /* -para X flag */
239 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
240 int forceR = 0; /* -b flag */
241 int ShowLog = 0; /* -showlog flag */
242 int ShowSuid = 0; /* -showsuid flag */
243 int ShowMounts = 0; /* -showmounts flag */
244 int orphans = ORPH_IGNORE; /* -orphans option */
249 int useSyslog = 0; /* -syslog flag */
250 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
259 #define MAXPARALLEL 32
261 int OKToZap; /* -o flag */
262 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
263 * in the volume header */
265 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
267 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
270 * information that is 'global' to a particular salvage job.
273 Device fileSysDevice; /**< The device number of the current partition
275 char fileSysPath[8]; /**< The path of the mounted partition currently
276 * being salvaged, i.e. the directory containing
277 * the volume headers */
278 char *fileSysPathName; /**< NT needs this to make name pretty log. */
279 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
280 int VGLinkH_cnt; /**< # of references to lnk handle. */
281 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
284 char *fileSysDeviceName; /**< The block device where the file system being
285 * salvaged was mounted */
286 char *filesysfulldev;
288 int VolumeChanged; /**< Set by any routine which would change the
289 * volume in a way which would require callbacks
290 * to be broken if the volume was put back on
291 * on line by an active file server */
293 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
294 * header dealt with */
296 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
297 int inodeFd; /**< File descriptor for inode file */
299 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
300 int nVolumes; /**< Number of volumes (read-write and read-only)
301 * in volume summary */
302 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
305 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
306 * vnodes in the volume that
307 * we are currently looking
315 /* Forward declarations */
316 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
317 static int AskVolumeSummary(struct SalvInfo *salvinfo,
318 VolumeId singleVolumeNumber);
320 #ifdef AFS_DEMAND_ATTACH_FS
321 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
322 #endif /* AFS_DEMAND_ATTACH_FS */
324 /* Uniquifier stored in the Inode */
329 return (u & 0x3fffff);
331 #if defined(AFS_SGI_EXMAG)
332 return (u & SGI_UNIQMASK);
335 #endif /* AFS_SGI_EXMAG */
342 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
344 return 0; /* otherwise may be transient, e.g. EMFILE */
349 char *save_args[MAX_ARGS];
351 extern pthread_t main_thread;
352 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
356 * Get the salvage lock if not already held. Hold until process exits.
358 * @param[in] locktype READ_LOCK or WRITE_LOCK
361 _ObtainSalvageLock(int locktype)
363 struct VLockFile salvageLock;
368 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
370 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
373 "salvager: There appears to be another salvager running! "
378 "salvager: Error %d trying to acquire salvage lock! "
384 ObtainSalvageLock(void)
386 _ObtainSalvageLock(WRITE_LOCK);
389 ObtainSharedSalvageLock(void)
391 _ObtainSalvageLock(READ_LOCK);
395 #ifdef AFS_SGI_XFS_IOPS_ENV
396 /* Check if the given partition is mounted. For XFS, the root inode is not a
397 * constant. So we check the hard way.
400 IsPartitionMounted(char *part)
403 struct mntent *mntent;
405 assert(mntfp = setmntent(MOUNTED, "r"));
406 while (mntent = getmntent(mntfp)) {
407 if (!strcmp(part, mntent->mnt_dir))
412 return mntent ? 1 : 1;
415 /* Check if the given inode is the root of the filesystem. */
416 #ifndef AFS_SGI_XFS_IOPS_ENV
418 IsRootInode(struct afs_stat *status)
421 * The root inode is not a fixed value in XFS partitions. So we need to
422 * see if the partition is in the list of mounted partitions. This only
423 * affects the SalvageFileSys path, so we check there.
425 return (status->st_ino == ROOTINODE);
430 #ifndef AFS_NAMEI_ENV
431 /* We don't want to salvage big files filesystems, since we can't put volumes on
435 CheckIfBigFilesFS(char *mountPoint, char *devName)
437 struct superblock fs;
440 if (strncmp(devName, "/dev/", 5)) {
441 (void)sprintf(name, "/dev/%s", devName);
443 (void)strcpy(name, devName);
446 if (ReadSuper(&fs, name) < 0) {
447 Log("Unable to read superblock. Not salvaging partition %s.\n",
451 if (IsBigFilesFileSystem(&fs)) {
452 Log("Partition %s is a big files filesystem, not salvaging.\n",
462 #define HDSTR "\\Device\\Harddisk"
463 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
465 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
470 static int dowarn = 1;
472 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
474 if (strncmp(res, HDSTR, HDLEN)) {
477 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
478 res, HDSTR, p1->devName);
482 d1 = atoi(&res[HDLEN]);
484 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
486 if (strncmp(res, HDSTR, HDLEN)) {
489 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
490 res, HDSTR, p2->devName);
494 d2 = atoi(&res[HDLEN]);
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, '/');
720 ptr = (char *)strrchr(pbuffer, '/');
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s\\", salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 AskOffline(salvinfo, singleVolumeNumber);
794 #ifdef AFS_DEMAND_ATTACH_FS
795 if (LockVolume(salvinfo, singleVolumeNumber)) {
798 #endif /* AFS_DEMAND_ATTACH_FS */
801 VLockPartition(partP->name);
805 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
808 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
809 partP->name, name, (Testing ? "(READONLY mode)" : ""));
811 Log("***Forced salvage of all volumes on this partition***\n");
816 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
823 assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
824 while ((dp = readdir(dirp))) {
825 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
826 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
828 Log("Removing old salvager temp files %s\n", dp->d_name);
829 strcpy(npath, salvinfo->fileSysPath);
831 strcat(npath, dp->d_name);
837 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
839 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
840 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
842 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
846 inodeFile = fopen(inodeListPath, "w+b");
848 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
851 /* Using nt_unlink here since we're really using the delete on close
852 * semantics of unlink. In most places in the salvager, we really do
853 * mean to unlink the file at that point. Those places have been
854 * modified to actually do that so that the NT crt can be used there.
856 code = nt_unlink(inodeListPath);
858 code = unlink(inodeListPath);
861 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
864 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
868 salvinfo->inodeFd = fileno(inodeFile);
869 if (salvinfo->inodeFd == -1)
870 Abort("Temporary file %s is missing...\n", inodeListPath);
871 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
872 if (ListInodeOption) {
873 PrintInodeList(salvinfo);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
929 #ifdef AFS_DEMAND_ATTACH_FS
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS */
934 AskOnline(salvinfo, singleVolumeNumber);
936 /* Step through the volumeSummary list and set all volumes on-line.
937 * The volumes were taken off-line in GetVolumeSummary.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
944 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
945 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
948 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
952 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
955 sprintf(path, "%s/%s", salvinfo->fileSysPath, vsp->fileName);
958 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
961 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
963 Log("Error %ld destroying volume disk header for volume %lu\n",
964 afs_printable_int32_ld(code),
965 afs_printable_uint32_lu(vsp->header.id));
968 /* make sure we actually delete the fileName file; ENOENT
969 * is fine, since VDestroyVolumeDiskHeader probably already
971 if (unlink(path) && errno != ENOENT) {
972 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 CompareInodes(const void *_p1, const void *_p2)
981 const struct ViceInodeInfo *p1 = _p1;
982 const struct ViceInodeInfo *p2 = _p2;
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 VolumeId p1rwid, p2rwid;
987 (p1->u.vnode.vnodeNumber ==
988 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
990 (p2->u.vnode.vnodeNumber ==
991 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
999 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1000 if (p1->u.vnode.volumeId == p1rwid)
1002 if (p2->u.vnode.volumeId == p2rwid)
1004 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1006 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1007 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1008 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1010 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1012 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1014 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1016 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1018 /* The following tests are reversed, so that the most desirable
1019 * of several similar inodes comes first */
1020 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1076 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1077 struct InodeSummary *summary)
1079 VolumeId volume = ip->u.vnode.volumeId;
1080 VolumeId rwvolume = volume;
1085 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1087 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1089 rwvolume = ip->u.special.parentId;
1090 /* This isn't quite right, as there could (in error) be different
1091 * parent inodes in different special vnodes */
1093 if (maxunique < ip->u.vnode.vnodeUniquifier)
1094 maxunique = ip->u.vnode.vnodeUniquifier;
1098 summary->volumeId = volume;
1099 summary->RWvolumeId = rwvolume;
1100 summary->nInodes = n;
1101 summary->nSpecialInodes = nSpecial;
1102 summary->maxUniquifier = maxunique;
1106 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1108 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1109 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1110 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1115 * Collect list of inodes in file named by path. If a truly fatal error,
1116 * unlink the file and abort. For lessor errors, return -1. The file will
1117 * be unlinked by the caller.
1120 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1122 struct afs_stat status;
1125 struct ViceInodeInfo *ip, *ip_save;
1126 struct InodeSummary summary;
1127 char summaryFileName[50];
1130 char *dev = salvinfo->fileSysPath;
1131 char *wpath = salvinfo->fileSysPath;
1133 char *dev = salvinfo->fileSysDeviceName;
1134 char *wpath = salvinfo->filesysfulldev;
1136 char *part = salvinfo->fileSysPath;
1140 /* This file used to come from vfsck; cobble it up ourselves now... */
1142 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1143 singleVolumeNumber ? OnlyOneVolume : 0,
1144 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1146 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1149 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1151 if (forceSal && !ForceSalvage) {
1152 Log("***Forced salvage of all volumes on this partition***\n");
1155 fseek(inodeFile, 0L, SEEK_SET);
1156 salvinfo->inodeFd = fileno(inodeFile);
1157 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1158 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1160 tdir = (tmpdir ? tmpdir : part);
1162 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1163 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1165 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1166 "%s/salvage.temp.%d", tdir, getpid());
1168 summaryFile = afs_fopen(summaryFileName, "a+");
1169 if (summaryFile == NULL) {
1170 Abort("Unable to create inode summary file\n");
1174 /* Using nt_unlink here since we're really using the delete on close
1175 * semantics of unlink. In most places in the salvager, we really do
1176 * mean to unlink the file at that point. Those places have been
1177 * modified to actually do that so that the NT crt can be used there.
1179 code = nt_unlink(summaryFileName);
1181 code = unlink(summaryFileName);
1184 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1187 if (!canfork || debug || Fork() == 0) {
1189 unsigned long st_size=(unsigned long) status.st_size;
1190 nInodes = st_size / sizeof(struct ViceInodeInfo);
1192 fclose(summaryFile);
1193 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1194 RemoveTheForce(salvinfo->fileSysPath);
1196 struct VolumeSummary *vsp;
1199 GetVolumeSummary(salvinfo, singleVolumeNumber);
1201 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1203 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1206 Log("%s vice inodes on %s; not salvaged\n",
1207 singleVolumeNumber ? "No applicable" : "No", dev);
1210 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1212 fclose(summaryFile);
1214 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1217 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1218 fclose(summaryFile);
1219 Abort("Unable to read inode table; %s not salvaged\n", dev);
1221 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1222 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1223 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1230 CountVolumeInodes(ip, nInodes, &summary);
1231 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1232 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1233 fclose(summaryFile);
1236 summary.index += (summary.nInodes);
1237 nInodes -= summary.nInodes;
1238 ip += summary.nInodes;
1241 ip = ip_save = NULL;
1242 /* Following fflush is not fclose, because if it was debug mode would not work */
1243 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1244 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1245 fclose(summaryFile);
1248 if (canfork && !debug) {
1253 if (Wait("Inode summary") == -1) {
1254 fclose(summaryFile);
1255 Exit(1); /* salvage of this partition aborted */
1258 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1259 if (status.st_size != 0) {
1261 unsigned long st_status=(unsigned long)status.st_size;
1262 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1263 assert(salvinfo->inodeSummary != NULL);
1264 /* For GNU we need to do lseek to get the file pointer moved. */
1265 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1266 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1267 assert(ret == st_status);
1269 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1270 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1271 salvinfo->inodeSummary[i].volSummary = NULL;
1273 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1274 fclose(summaryFile);
1278 /* Comparison routine for volume sort.
1279 This is setup so that a read-write volume comes immediately before
1280 any read-only clones of that volume */
1282 CompareVolumes(const void *_p1, const void *_p2)
1284 const struct VolumeSummary *p1 = _p1;
1285 const struct VolumeSummary *p2 = _p2;
1286 if (p1->header.parent != p2->header.parent)
1287 return p1->header.parent < p2->header.parent ? -1 : 1;
1288 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1290 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1292 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1296 * Gleans volumeSummary information by asking the fileserver
1298 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1299 * salvaging a whole partition
1301 * @return whether we obtained the volume summary information or not
1302 * @retval 0 success; we obtained the volume summary information
1303 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1305 * @retval 1 we did not get the volume summary information; either the
1306 * fileserver responded with an error, or we are not supposed to
1307 * ask the fileserver for the information (e.g. we are salvaging
1308 * the entire partition or we are not the salvageserver)
1310 * @note for non-DAFS, always returns 1
1313 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1316 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1317 if (programType == salvageServer) {
1318 if (singleVolumeNumber) {
1319 FSSYNC_VGQry_response_t q_res;
1321 struct VolumeSummary *vsp;
1323 struct VolumeDiskHeader diskHdr;
1325 memset(&res, 0, sizeof(res));
1327 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1330 * We must wait for the partition to finish scanning before
1331 * can continue, since we will not know if we got the entire
1332 * VG membership unless the partition is fully scanned.
1333 * We could, in theory, just scan the partition ourselves if
1334 * the VG cache is not ready, but we would be doing the exact
1335 * same scan the fileserver is doing; it will almost always
1336 * be faster to wait for the fileserver. The only exceptions
1337 * are if the partition does not take very long to scan, and
1338 * in that case it's fast either way, so who cares?
1340 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1341 Log("waiting for fileserver to finish scanning partition %s...\n",
1342 salvinfo->fileSysPartition->name);
1344 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1345 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1346 * just so small partitions don't need to wait over 10
1347 * seconds every time, and large partitions are generally
1348 * polled only once every ten seconds. */
1349 sleep((i > 10) ? (i = 10) : i);
1351 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1355 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1356 /* This can happen if there's no header for the volume
1357 * we're salvaging, or no headers exist for the VG (if
1358 * we're salvaging an RW). Act as if we got a response
1359 * with no VG members. The headers may be created during
1360 * salvaging, if there are inodes in this VG. */
1362 memset(&q_res, 0, sizeof(q_res));
1363 q_res.rw = singleVolumeNumber;
1367 Log("fileserver refused VGCQuery request for volume %lu on "
1368 "partition %s, code %ld reason %ld\n",
1369 afs_printable_uint32_lu(singleVolumeNumber),
1370 salvinfo->fileSysPartition->name,
1371 afs_printable_int32_ld(code),
1372 afs_printable_int32_ld(res.hdr.reason));
1376 if (q_res.rw != singleVolumeNumber) {
1377 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1378 afs_printable_uint32_lu(singleVolumeNumber),
1379 afs_printable_uint32_lu(q_res.rw));
1380 #ifdef SALVSYNC_BUILD_CLIENT
1381 if (SALVSYNC_LinkVolume(q_res.rw,
1383 salvinfo->fileSysPartition->name,
1385 Log("schedule request failed\n");
1387 #endif /* SALVSYNC_BUILD_CLIENT */
1388 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1391 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1392 assert(salvinfo->volumeSummaryp != NULL);
1394 salvinfo->nVolumes = 0;
1395 vsp = salvinfo->volumeSummaryp;
1397 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1398 char name[VMAXPATHLEN];
1400 if (!q_res.children[i]) {
1404 /* AskOffline for singleVolumeNumber was called much earlier */
1405 if (q_res.children[i] != singleVolumeNumber) {
1406 AskOffline(salvinfo, q_res.children[i]);
1407 if (LockVolume(salvinfo, q_res.children[i])) {
1413 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1415 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1416 afs_printable_uint32_lu(q_res.children[i]));
1421 DiskToVolumeHeader(&vsp->header, &diskHdr);
1422 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1423 vsp->fileName = ToString(name);
1424 salvinfo->nVolumes++;
1428 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1433 Log("Cannot get volume summary from fileserver; falling back to scanning "
1434 "entire partition\n");
1437 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1442 * count how many volume headers are found by VWalkVolumeHeaders.
1444 * @param[in] dp the disk partition (unused)
1445 * @param[in] name full path to the .vol header (unused)
1446 * @param[in] hdr the header data (unused)
1447 * @param[in] last whether this is the last try or not (unused)
1448 * @param[in] rock actually an afs_int32*; the running count of how many
1449 * volumes we have found
1454 CountHeader(struct DiskPartition64 *dp, const char *name,
1455 struct VolumeDiskHeader *hdr, int last, void *rock)
1457 afs_int32 *nvols = (afs_int32 *)rock;
1463 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1466 struct SalvageScanParams {
1467 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1468 * vol id of the VG we're salvaging */
1469 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1470 * we're filling in */
1471 afs_int32 nVolumes; /**< # of vols we've encountered */
1472 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1473 * # of vols we've alloc'd memory for) */
1474 int retry; /**< do we need to retry vol lock/checkout? */
1475 struct SalvInfo *salvinfo; /**< salvage job info */
1479 * records volume summary info found from VWalkVolumeHeaders.
1481 * Found volumes are also taken offline if they are in the specific volume
1482 * group we are looking for.
1484 * @param[in] dp the disk partition
1485 * @param[in] name full path to the .vol header
1486 * @param[in] hdr the header data
1487 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1488 * @param[in] rock actually a struct SalvageScanParams*, containing the
1489 * information needed to record the volume summary data
1491 * @return operation status
1493 * @retval -1 volume locking raced with fileserver restart; checking out
1494 * and locking volumes needs to be retried
1495 * @retval 1 volume header is mis-named and should be deleted
1498 RecordHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 char nameShouldBe[64];
1502 struct SalvageScanParams *params;
1503 struct VolumeSummary summary;
1504 VolumeId singleVolumeNumber;
1505 struct SalvInfo *salvinfo;
1507 params = (struct SalvageScanParams *)rock;
1509 singleVolumeNumber = params->singleVolumeNumber;
1510 salvinfo = params->salvinfo;
1512 DiskToVolumeHeader(&summary.header, hdr);
1514 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1515 && summary.header.parent != singleVolumeNumber) {
1517 if (programType == salvageServer) {
1518 #ifdef SALVSYNC_BUILD_CLIENT
1519 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1520 summary.header.id, summary.header.parent);
1521 if (SALVSYNC_LinkVolume(summary.header.parent,
1525 Log("schedule request failed\n");
1528 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1531 Log("%u is a read-only volume; not salvaged\n",
1532 singleVolumeNumber);
1537 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1538 || summary.header.parent == singleVolumeNumber) {
1540 /* check if the header file is incorrectly named */
1542 const char *base = strrchr(name, '/');
1549 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1550 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1553 if (strcmp(nameShouldBe, base)) {
1554 /* .vol file has wrong name; retry/delete */
1558 if (!badname || last) {
1559 /* only offline the volume if the header is good, or if this is
1560 * the last try looking at it; avoid AskOffline'ing the same vol
1563 if (singleVolumeNumber
1564 && summary.header.id != singleVolumeNumber) {
1565 /* don't offline singleVolumeNumber; we already did that
1568 AskOffline(salvinfo, summary.header.id);
1570 #ifdef AFS_DEMAND_ATTACH_FS
1572 /* don't lock the volume if the header is bad, since we're
1573 * about to delete it anyway. */
1574 if (LockVolume(salvinfo, summary.header.id)) {
1579 #endif /* AFS_DEMAND_ATTACH_FS */
1583 if (last && !Showmode) {
1584 Log("Volume header file %s is incorrectly named (should be %s "
1585 "not %s); %sdeleted (it will be recreated later, if "
1586 "necessary)\n", name, nameShouldBe, base,
1587 (Testing ? "it would have been " : ""));
1592 summary.fileName = ToString(base);
1595 if (params->nVolumes > params->totalVolumes) {
1596 /* We found more volumes than we found on the first partition walk;
1597 * apparently something created a volume while we were
1598 * partition-salvaging, or we found more than 20 vols when salvaging a
1599 * particular volume. Abort if we detect this, since other programs
1600 * supposed to not touch the partition while it is partition-salvaging,
1601 * and we shouldn't find more than 20 vols in a VG.
1603 Abort("Found %ld vol headers, but should have found at most %ld! "
1604 "Make sure the volserver/fileserver are not running at the "
1605 "same time as a partition salvage\n",
1606 afs_printable_int32_ld(params->nVolumes),
1607 afs_printable_int32_ld(params->totalVolumes));
1610 memcpy(params->vsp, &summary, sizeof(summary));
1618 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1620 * If the header could not be read in at all, the header is always unlinked.
1621 * If instead RecordHeader said the header was bad (that is, the header file
1622 * is mis-named), we only unlink if we are doing a partition salvage, as
1623 * opposed to salvaging a specific volume group.
1625 * @param[in] dp the disk partition
1626 * @param[in] name full path to the .vol header
1627 * @param[in] hdr header data, or NULL if the header could not be read
1628 * @param[in] rock actually a struct SalvageScanParams*, with some information
1632 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1633 struct VolumeDiskHeader *hdr, void *rock)
1635 struct SalvageScanParams *params;
1638 params = (struct SalvageScanParams *)rock;
1641 /* no header; header is too bogus to read in at all */
1643 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1649 } else if (!params->singleVolumeNumber) {
1650 /* We were able to read in a header, but RecordHeader said something
1651 * was wrong with it. We only unlink those if we are doing a partition
1658 if (dounlink && unlink(name)) {
1659 Log("Error %d while trying to unlink %s\n", errno, name);
1664 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1665 * the fileserver for VG information, or by scanning the /vicepX partition.
1667 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1668 * are salvaging, or 0 if this is a partition
1671 * @return operation status
1673 * @retval -1 we raced with a fileserver restart; checking out and locking
1674 * volumes must be retried
1677 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1679 afs_int32 nvols = 0;
1680 struct SalvageScanParams params;
1683 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1685 /* we successfully got the vol information from the fileserver; no
1686 * need to scan the partition */
1690 /* we need to retry volume checkout */
1694 if (!singleVolumeNumber) {
1695 /* Count how many volumes we have in /vicepX */
1696 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1699 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1704 nvols = VOL_VG_MAX_VOLS;
1707 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1708 assert(salvinfo->volumeSummaryp != NULL);
1710 params.singleVolumeNumber = singleVolumeNumber;
1711 params.vsp = salvinfo->volumeSummaryp;
1712 params.nVolumes = 0;
1713 params.totalVolumes = nvols;
1715 params.salvinfo = salvinfo;
1717 /* walk the partition directory of volume headers and record the info
1718 * about them; unlinking invalid headers */
1719 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1720 UnlinkHeader, ¶ms);
1722 /* we apparently need to retry checking-out/locking volumes */
1726 Abort("Failed to get volume header summary\n");
1728 salvinfo->nVolumes = params.nVolumes;
1730 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1736 /* Find the link table. This should be associated with the RW volume or, if
1737 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1740 FindLinkHandle(struct InodeSummary *isp, int nVols,
1741 struct ViceInodeInfo *allInodes)
1744 struct ViceInodeInfo *ip;
1746 for (i = 0; i < nVols; i++) {
1747 ip = allInodes + isp[i].index;
1748 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1749 if (ip[j].u.special.type == VI_LINKTABLE)
1750 return ip[j].inodeNumber;
1757 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1759 struct versionStamp version;
1762 if (!VALID_INO(ino))
1764 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1765 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1766 if (!VALID_INO(ino))
1768 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1769 isp->RWvolumeId, errno);
1770 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1771 fdP = IH_OPEN(salvinfo->VGLinkH);
1773 Abort("Can't open link table for volume %u (error = %d)\n",
1774 isp->RWvolumeId, errno);
1776 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1777 Abort("Can't truncate link table for volume %u (error = %d)\n",
1778 isp->RWvolumeId, errno);
1780 version.magic = LINKTABLEMAGIC;
1781 version.version = LINKTABLEVERSION;
1783 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1785 Abort("Can't truncate link table for volume %u (error = %d)\n",
1786 isp->RWvolumeId, errno);
1788 FDH_REALLYCLOSE(fdP);
1790 /* If the volume summary exits (i.e., the V*.vol header file exists),
1791 * then set this inode there as well.
1793 if (isp->volSummary)
1794 isp->volSummary->header.linkTable = ino;
1803 SVGParms_t *parms = (SVGParms_t *) arg;
1804 DoSalvageVolumeGroup(parms->salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1809 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1812 pthread_attr_t tattr;
1816 /* Initialize per volume global variables, even if later code does so */
1817 salvinfo->VolumeChanged = 0;
1818 salvinfo->VGLinkH = NULL;
1819 salvinfo->VGLinkH_cnt = 0;
1820 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1822 parms.svgp_inodeSummaryp = isp;
1823 parms.svgp_count = nVols;
1824 parms.svgp_salvinfo = salvinfo;
1825 code = pthread_attr_init(&tattr);
1827 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1831 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1833 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1836 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1838 Log("Failed to create thread to salvage volume group %u\n",
1842 (void)pthread_join(tid, NULL);
1844 #endif /* AFS_NT40_ENV */
1847 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1849 struct ViceInodeInfo *inodes, *allInodes, *ip;
1850 int i, totalInodes, size, salvageTo;
1854 int dec_VGLinkH = 0;
1856 FdHandle_t *fdP = NULL;
1858 salvinfo->VGLinkH_cnt = 0;
1859 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1860 && isp->nSpecialInodes > 0);
1861 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1862 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1865 if (ShowMounts && !haveRWvolume)
1867 if (canfork && !debug && Fork() != 0) {
1868 (void)Wait("Salvage volume group");
1871 for (i = 0, totalInodes = 0; i < nVols; i++)
1872 totalInodes += isp[i].nInodes;
1873 size = totalInodes * sizeof(struct ViceInodeInfo);
1874 inodes = (struct ViceInodeInfo *)malloc(size);
1875 allInodes = inodes - isp->index; /* this would the base of all the inodes
1876 * for the partition, if all the inodes
1877 * had been read into memory */
1879 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1881 assert(read(salvinfo->inodeFd, inodes, size) == size);
1883 /* Don't try to salvage a read write volume if there isn't one on this
1885 salvageTo = haveRWvolume ? 0 : 1;
1887 #ifdef AFS_NAMEI_ENV
1888 ino = FindLinkHandle(isp, nVols, allInodes);
1889 if (VALID_INO(ino)) {
1890 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1891 fdP = IH_OPEN(salvinfo->VGLinkH);
1893 if (!VALID_INO(ino) || fdP == NULL) {
1894 Log("%s link table for volume %u.\n",
1895 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1897 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1900 struct ViceInodeInfo *ip;
1901 CreateLinkTable(salvinfo, isp, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1903 /* Sync fake 1 link counts to the link table, now that it exists */
1905 for (i = 0; i < nVols; i++) {
1906 ip = allInodes + isp[i].index;
1907 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1909 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1911 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1919 FDH_REALLYCLOSE(fdP);
1921 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1924 /* Salvage in reverse order--read/write volume last; this way any
1925 * Inodes not referenced by the time we salvage the read/write volume
1926 * can be picked up by the read/write volume */
1927 /* ACTUALLY, that's not done right now--the inodes just vanish */
1928 for (i = nVols - 1; i >= salvageTo; i--) {
1930 struct InodeSummary *lisp = &isp[i];
1931 #ifdef AFS_NAMEI_ENV
1932 /* If only the RO is present on this partition, the link table
1933 * shows up as a RW volume special file. Need to make sure the
1934 * salvager doesn't try to salvage the non-existent RW.
1936 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1937 /* If this only special inode is the link table, continue */
1938 if (inodes->u.special.type == VI_LINKTABLE) {
1945 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1946 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1947 /* Check inodes twice. The second time do things seriously. This
1948 * way the whole RO volume can be deleted, below, if anything goes wrong */
1949 for (check = 1; check >= 0; check--) {
1951 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1953 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1954 if (rw && deleteMe) {
1955 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1956 * volume won't be called */
1962 if (rw && check == 1)
1964 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1965 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1971 /* Fix actual inode counts */
1973 Log("totalInodes %d\n",totalInodes);
1974 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1975 static int TraceBadLinkCounts = 0;
1976 #ifdef AFS_NAMEI_ENV
1977 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1978 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1979 VGLinkH_p1 = ip->u.param[0];
1980 continue; /* Deal with this last. */
1983 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1984 TraceBadLinkCounts--; /* Limit reports, per volume */
1985 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1987 while (ip->linkCount > 0) {
1988 /* below used to assert, not break */
1990 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1991 Log("idec failed. inode %s errno %d\n",
1992 PrintInode(NULL, ip->inodeNumber), errno);
1998 while (ip->linkCount < 0) {
1999 /* these used to be asserts */
2001 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2002 Log("iinc failed. inode %s errno %d\n",
2003 PrintInode(NULL, ip->inodeNumber), errno);
2010 #ifdef AFS_NAMEI_ENV
2011 while (dec_VGLinkH > 0) {
2012 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2013 Log("idec failed on link table, errno = %d\n", errno);
2017 while (dec_VGLinkH < 0) {
2018 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2019 Log("iinc failed on link table, errno = %d\n", errno);
2026 /* Directory consistency checks on the rw volume */
2028 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2029 IH_RELEASE(salvinfo->VGLinkH);
2031 if (canfork && !debug) {
2038 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2040 /* Check headers BEFORE forking */
2044 for (i = 0; i < nVols; i++) {
2045 struct VolumeSummary *vs = isp[i].volSummary;
2046 VolumeDiskData volHeader;
2048 /* Don't salvage just because phantom rw volume is there... */
2049 /* (If a read-only volume exists, read/write inodes must also exist) */
2050 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2054 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2055 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2056 == sizeof(volHeader)
2057 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2058 && volHeader.dontSalvage == DONT_SALVAGE
2059 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2060 if (volHeader.inUse != 0) {
2061 volHeader.inUse = 0;
2062 volHeader.inService = 1;
2064 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2065 != sizeof(volHeader)) {
2081 /* SalvageVolumeHeaderFile
2083 * Salvage the top level V*.vol header file. Make sure the special files
2084 * exist and that there are no duplicates.
2086 * Calls SalvageHeader for each possible type of volume special file.
2090 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2091 struct ViceInodeInfo *inodes, int RW,
2092 int check, int *deleteMe)
2095 struct ViceInodeInfo *ip;
2096 int allinodesobsolete = 1;
2097 struct VolumeDiskHeader diskHeader;
2098 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2100 struct VolumeHeader tempHeader;
2101 struct afs_inode_info stuff[MAXINODETYPE];
2103 /* keeps track of special inodes that are probably 'good'; they are
2104 * referenced in the vol header, and are included in the given inodes
2109 } goodspecial[MAXINODETYPE];
2114 memset(goodspecial, 0, sizeof(goodspecial));
2116 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2118 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2120 Log("cannot allocate memory for inode skip array when salvaging "
2121 "volume %lu; not performing duplicate special inode recovery\n",
2122 afs_printable_uint32_lu(isp->volumeId));
2123 /* still try to perform the salvage; the skip array only does anything
2124 * if we detect duplicate special inodes */
2127 init_inode_info(&tempHeader, stuff);
2130 * First, look at the special inodes and see if any are referenced by
2131 * the existing volume header. If we find duplicate special inodes, we
2132 * can use this information to use the referenced inode (it's more
2133 * likely to be the 'good' one), and throw away the duplicates.
2135 if (isp->volSummary && skip) {
2136 /* use tempHeader, so we can use the stuff[] array to easily index
2137 * into the isp->volSummary special inodes */
2138 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2140 for (i = 0; i < isp->nSpecialInodes; i++) {
2141 ip = &inodes[isp->index + i];
2142 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2143 /* will get taken care of in a later loop */
2146 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2147 goodspecial[ip->u.special.type-1].valid = 1;
2148 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2153 memset(&tempHeader, 0, sizeof(tempHeader));
2154 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2155 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2156 tempHeader.id = isp->volumeId;
2157 tempHeader.parent = isp->RWvolumeId;
2159 /* Check for duplicates (inodes are sorted by type field) */
2160 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2161 ip = &inodes[isp->index + i];
2162 if (ip->u.special.type == (ip + 1)->u.special.type) {
2163 afs_ino_str_t stmp1, stmp2;
2165 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2166 /* Will be caught in the loop below */
2170 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2171 ip->u.special.type, isp->volumeId,
2172 PrintInode(stmp1, ip->inodeNumber),
2173 PrintInode(stmp2, (ip+1)->inodeNumber));
2175 if (skip && goodspecial[ip->u.special.type-1].valid) {
2176 Inode gi = goodspecial[ip->u.special.type-1].inode;
2179 Log("using special inode referenced by vol header (%s)\n",
2180 PrintInode(stmp1, gi));
2183 /* the volume header references some special inode of
2184 * this type in the inodes array; are we it? */
2185 if (ip->inodeNumber != gi) {
2187 } else if ((ip+1)->inodeNumber != gi) {
2188 /* in case this is the last iteration; we need to
2189 * make sure we check ip+1, too */
2194 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2202 for (i = 0; i < isp->nSpecialInodes; i++) {
2203 ip = &inodes[isp->index + i];
2204 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2206 Log("Rubbish header inode %s of type %d\n",
2207 PrintInode(NULL, ip->inodeNumber),
2208 ip->u.special.type);
2214 Log("Rubbish header inode %s of type %d; deleted\n",
2215 PrintInode(NULL, ip->inodeNumber),
2216 ip->u.special.type);
2217 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2218 if (skip && skip[i]) {
2219 if (orphans == ORPH_REMOVE) {
2220 Log("Removing orphan special inode %s of type %d\n",
2221 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2224 Log("Ignoring orphan special inode %s of type %d\n",
2225 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2226 /* fall through to the ip->linkCount--; line below */
2229 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2230 allinodesobsolete = 0;
2232 if (!check && ip->u.special.type != VI_LINKTABLE)
2233 ip->linkCount--; /* Keep the inode around */
2241 if (allinodesobsolete) {
2248 salvinfo->VGLinkH_cnt++; /* one for every header. */
2250 if (!RW && !check && isp->volSummary) {
2251 ClearROInUseBit(isp->volSummary);
2255 for (i = 0; i < MAXINODETYPE; i++) {
2256 if (stuff[i].inodeType == VI_LINKTABLE) {
2257 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2258 * And we may have recreated the link table earlier, so set the
2259 * RW header as well.
2261 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2262 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2266 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2270 if (isp->volSummary == NULL) {
2272 char headerName[64];
2273 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2274 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2276 Log("No header file for volume %u\n", isp->volumeId);
2280 Log("No header file for volume %u; %screating %s\n",
2281 isp->volumeId, (Testing ? "it would have been " : ""),
2283 isp->volSummary = (struct VolumeSummary *)
2284 malloc(sizeof(struct VolumeSummary));
2285 isp->volSummary->fileName = ToString(headerName);
2287 writefunc = VCreateVolumeDiskHeader;
2290 char headerName[64];
2291 /* hack: these two fields are obsolete... */
2292 isp->volSummary->header.volumeAcl = 0;
2293 isp->volSummary->header.volumeMountTable = 0;
2296 (&isp->volSummary->header, &tempHeader,
2297 sizeof(struct VolumeHeader))) {
2298 /* We often remove the name before calling us, so we make a fake one up */
2299 if (isp->volSummary->fileName) {
2300 strcpy(headerName, isp->volSummary->fileName);
2302 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2303 isp->volSummary->fileName = ToString(headerName);
2305 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2307 Log("Header file %s is damaged or no longer valid%s\n", path,
2308 (check ? "" : "; repairing"));
2312 writefunc = VWriteVolumeDiskHeader;
2316 memcpy(&isp->volSummary->header, &tempHeader,
2317 sizeof(struct VolumeHeader));
2320 Log("It would have written a new header file for volume %u\n",
2324 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2325 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2327 Log("Error %ld writing volume header file for volume %lu\n",
2328 afs_printable_int32_ld(code),
2329 afs_printable_uint32_lu(diskHeader.id));
2334 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2335 isp->volSummary->header.volumeInfo);
2340 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2341 struct InodeSummary *isp, int check, int *deleteMe)
2344 VolumeDiskData volumeInfo;
2345 struct versionStamp fileHeader;
2354 #ifndef AFS_NAMEI_ENV
2355 if (sp->inodeType == VI_LINKTABLE)
2358 if (*(sp->inode) == 0) {
2360 Log("Missing inode in volume header (%s)\n", sp->description);
2364 Log("Missing inode in volume header (%s); %s\n", sp->description,
2365 (Testing ? "it would have recreated it" : "recreating"));
2368 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2369 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2370 if (!VALID_INO(*(sp->inode)))
2372 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2373 sp->description, errno);
2378 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2379 fdP = IH_OPEN(specH);
2380 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2381 /* bail out early and destroy the volume */
2383 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2390 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2391 sp->description, errno);
2394 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2395 || header.fileHeader.magic != sp->stamp.magic)) {
2397 Log("Part of the header (%s) is corrupted\n", sp->description);
2398 FDH_REALLYCLOSE(fdP);
2402 Log("Part of the header (%s) is corrupted; recreating\n",
2405 /* header can be garbage; make sure we don't read garbage data from
2407 memset(&header, 0, sizeof(header));
2409 if (sp->inodeType == VI_VOLINFO
2410 && header.volumeInfo.destroyMe == DESTROY_ME) {
2413 FDH_REALLYCLOSE(fdP);
2417 if (recreate && !Testing) {
2420 ("Internal error: recreating volume header (%s) in check mode\n",
2422 nBytes = FDH_TRUNC(fdP, 0);
2424 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2425 sp->description, errno);
2427 /* The following code should be moved into vutil.c */
2428 if (sp->inodeType == VI_VOLINFO) {
2430 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2431 header.volumeInfo.stamp = sp->stamp;
2432 header.volumeInfo.id = isp->volumeId;
2433 header.volumeInfo.parentId = isp->RWvolumeId;
2434 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2435 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2436 isp->volumeId, isp->volumeId);
2437 header.volumeInfo.inService = 0;
2438 header.volumeInfo.blessed = 0;
2439 /* The + 1000 is a hack in case there are any files out in venus caches */
2440 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2441 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2442 header.volumeInfo.needsCallback = 0;
2443 gettimeofday(&tp, 0);
2444 header.volumeInfo.creationDate = tp.tv_sec;
2445 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2447 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2448 sp->description, errno);
2451 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2452 sizeof(header.volumeInfo));
2453 if (nBytes != sizeof(header.volumeInfo)) {
2456 ("Unable to write volume header file (%s) (errno = %d)\n",
2457 sp->description, errno);
2458 Abort("Unable to write entire volume header file (%s)\n",
2462 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2464 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2465 sp->description, errno);
2467 nBytes = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2468 if (nBytes != sizeof(sp->stamp)) {
2471 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2472 sp->description, errno);
2474 ("Unable to write entire version stamp in volume header file (%s)\n",
2479 FDH_REALLYCLOSE(fdP);
2481 if (sp->inodeType == VI_VOLINFO) {
2482 salvinfo->VolInfo = header.volumeInfo;
2486 if (salvinfo->VolInfo.updateDate) {
2487 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2489 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2490 salvinfo->VolInfo.id,
2491 (Testing ? "it would have been " : ""), update);
2493 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2495 Log("%s (%u) not updated (created %s)\n",
2496 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2506 SalvageVnodes(struct SalvInfo *salvinfo,
2507 struct InodeSummary *rwIsp,
2508 struct InodeSummary *thisIsp,
2509 struct ViceInodeInfo *inodes, int check)
2511 int ilarge, ismall, ioffset, RW, nInodes;
2512 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2515 RW = (rwIsp == thisIsp);
2516 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2518 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2519 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2520 if (check && ismall == -1)
2523 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2524 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2525 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2529 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2530 struct ViceInodeInfo *ip, int nInodes,
2531 struct VolumeSummary *volSummary, int check)
2533 VolumeId volumeNumber;
2534 char buf[SIZEOF_LARGEDISKVNODE];
2535 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2537 StreamHandle_t *file;
2538 struct VnodeClassInfo *vcp;
2540 afs_sfsize_t nVnodes;
2541 afs_fsize_t vnodeLength;
2543 afs_ino_str_t stmp1, stmp2;
2547 volumeNumber = volSummary->header.id;
2548 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2549 fdP = IH_OPEN(handle);
2550 assert(fdP != NULL);
2551 file = FDH_FDOPEN(fdP, "r+");
2552 assert(file != NULL);
2553 vcp = &VnodeClassInfo[class];
2554 size = OS_SIZE(fdP->fd_fd);
2556 nVnodes = (size / vcp->diskSize) - 1;
2558 assert((nVnodes + 1) * vcp->diskSize == size);
2559 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2563 for (vnodeIndex = 0;
2564 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2565 nVnodes--, vnodeIndex++) {
2566 if (vnode->type != vNull) {
2567 int vnodeChanged = 0;
2568 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2569 /* Log programs that belong to root (potentially suid root);
2570 * don't bother for read-only or backup volumes */
2571 #ifdef notdef /* This is done elsewhere */
2572 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2573 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", salvinfo->VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2575 if (VNDISK_GET_INO(vnode) == 0) {
2577 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2578 memset(vnode, 0, vcp->diskSize);
2582 if (vcp->magic != vnode->vnodeMagic) {
2583 /* bad magic #, probably partially created vnode */
2584 Log("Partially allocated vnode %d deleted.\n",
2586 memset(vnode, 0, vcp->diskSize);
2590 /* ****** Should do a bit more salvage here: e.g. make sure
2591 * vnode type matches what it should be given the index */
2592 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2593 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2594 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2595 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2602 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2603 /* The following doesn't work, because the version number
2604 * is not maintained correctly by the file server */
2605 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2606 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2608 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2614 /* For RW volume, look for vnode with matching inode number;
2615 * if no such match, take the first determined by our sort
2617 struct ViceInodeInfo *lip = ip;
2618 int lnInodes = nInodes;
2620 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2621 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2630 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2631 /* "Matching" inode */
2635 vu = vnode->uniquifier;
2636 iu = ip->u.vnode.vnodeUniquifier;
2637 vd = vnode->dataVersion;
2638 id = ip->u.vnode.inodeDataVersion;
2640 * Because of the possibility of the uniquifier overflows (> 4M)
2641 * we compare them modulo the low 22-bits; we shouldn't worry
2642 * about mismatching since they shouldn't to many old
2643 * uniquifiers of the same vnode...
2645 if (IUnique(vu) != IUnique(iu)) {
2647 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2650 vnode->uniquifier = iu;
2651 #ifdef AFS_3DISPARES
2652 vnode->dataVersion = (id >= vd ?
2655 1887437 ? vd : id) :
2658 1887437 ? id : vd));
2660 #if defined(AFS_SGI_EXMAG)
2661 vnode->dataVersion = (id >= vd ?
2664 15099494 ? vd : id) :
2667 15099494 ? id : vd));
2669 vnode->dataVersion = (id > vd ? id : vd);
2670 #endif /* AFS_SGI_EXMAG */
2671 #endif /* AFS_3DISPARES */
2674 /* don't bother checking for vd > id any more, since
2675 * partial file transfers always result in this state,
2676 * and you can't do much else anyway (you've already
2677 * found the best data you can) */
2678 #ifdef AFS_3DISPARES
2679 if (!vnodeIsDirectory(vnodeNumber)
2680 && ((vd < id && (id - vd) < 1887437)
2681 || ((vd > id && (vd - id) > 1887437)))) {
2683 #if defined(AFS_SGI_EXMAG)
2684 if (!vnodeIsDirectory(vnodeNumber)
2685 && ((vd < id && (id - vd) < 15099494)
2686 || ((vd > id && (vd - id) > 15099494)))) {
2688 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2689 #endif /* AFS_SGI_EXMAG */
2692 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2693 vnode->dataVersion = id;
2698 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2701 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2703 VNDISK_SET_INO(vnode, ip->inodeNumber);
2708 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2710 VNDISK_SET_INO(vnode, ip->inodeNumber);
2713 VNDISK_GET_LEN(vnodeLength, vnode);
2714 if (ip->byteCount != vnodeLength) {
2717 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2722 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2723 VNDISK_SET_LEN(vnode, ip->byteCount);
2727 ip->linkCount--; /* Keep the inode around */
2730 } else { /* no matching inode */
2731 if (VNDISK_GET_INO(vnode) != 0
2732 || vnode->type == vDirectory) {
2733 /* No matching inode--get rid of the vnode */
2735 if (VNDISK_GET_INO(vnode)) {
2737 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2741 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2746 if (VNDISK_GET_INO(vnode)) {
2748 time_t serverModifyTime = vnode->serverModifyTime;
2749 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2753 time_t serverModifyTime = vnode->serverModifyTime;
2754 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2757 memset(vnode, 0, vcp->diskSize);
2760 /* Should not reach here becuase we checked for
2761 * (inodeNumber == 0) above. And where we zero the vnode,
2762 * we also goto vnodeDone.
2766 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2770 } /* VNDISK_GET_INO(vnode) != 0 */
2772 assert(!(vnodeChanged && check));
2773 if (vnodeChanged && !Testing) {
2775 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2776 (char *)vnode, vcp->diskSize)
2778 salvinfo->VolumeChanged = 1; /* For break call back */
2789 struct VnodeEssence *
2790 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2793 struct VnodeInfo *vip;
2796 class = vnodeIdToClass(vnodeNumber);
2797 vip = &salvinfo->vnodeInfo[class];
2798 offset = vnodeIdToBitNumber(vnodeNumber);
2799 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2803 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2805 /* Copy the directory unconditionally if we are going to change it:
2806 * not just if was cloned.
2808 struct VnodeDiskObject vnode;
2809 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2810 Inode oldinode, newinode;
2813 if (dir->copied || Testing)
2815 DFlush(); /* Well justified paranoia... */
2818 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2819 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2821 assert(code == sizeof(vnode));
2822 oldinode = VNDISK_GET_INO(&vnode);
2823 /* Increment the version number by a whole lot to avoid problems with
2824 * clients that were promised new version numbers--but the file server
2825 * crashed before the versions were written to disk.
2828 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2829 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2831 assert(VALID_INO(newinode));
2832 assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2834 VNDISK_SET_INO(&vnode, newinode);
2836 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2837 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2839 assert(code == sizeof(vnode));
2841 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2842 salvinfo->fileSysDevice, newinode,
2843 &salvinfo->VolumeChanged);
2844 /* Don't delete the original inode right away, because the directory is
2845 * still being scanned.
2851 * This function should either successfully create a new dir, or give up
2852 * and leave things the way they were. In particular, if it fails to write
2853 * the new dir properly, it should return w/o changing the reference to the
2857 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2859 struct VnodeDiskObject vnode;
2860 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2861 Inode oldinode, newinode;
2866 afs_int32 parentUnique = 1;
2867 struct VnodeEssence *vnodeEssence;
2872 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2874 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2875 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2877 assert(lcode == sizeof(vnode));
2878 oldinode = VNDISK_GET_INO(&vnode);
2879 /* Increment the version number by a whole lot to avoid problems with
2880 * clients that were promised new version numbers--but the file server
2881 * crashed before the versions were written to disk.
2884 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2885 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2887 assert(VALID_INO(newinode));
2888 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2889 &salvinfo->VolumeChanged);
2891 /* Assign . and .. vnode numbers from dir and vnode.parent.
2892 * The uniquifier for . is in the vnode.
2893 * The uniquifier for .. might be set to a bogus value of 1 and
2894 * the salvager will later clean it up.
2896 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2897 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2900 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2902 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2907 /* didn't really build the new directory properly, let's just give up. */
2908 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2909 Log("Directory salvage returned code %d, continuing.\n", code);
2911 Log("also failed to decrement link count on new inode");
2915 Log("Checking the results of the directory salvage...\n");
2916 if (!DirOK(&newdir)) {
2917 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2918 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2923 VNDISK_SET_INO(&vnode, newinode);
2924 length = Length(&newdir);
2925 VNDISK_SET_LEN(&vnode, length);
2927 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2928 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2930 assert(lcode == sizeof(vnode));
2933 nt_sync(salvinfo->fileSysDevice);
2935 sync(); /* this is slow, but hopefully rarely called. We don't have
2936 * an open FD on the file itself to fsync.
2940 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2942 /* make sure old directory file is really closed */
2943 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2944 FDH_REALLYCLOSE(fdP);
2946 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2948 dir->dirHandle = newdir;
2952 * arguments for JudgeEntry.
2954 struct judgeEntry_params {
2955 struct DirSummary *dir; /**< directory we're examining entries in */
2956 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2960 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2963 struct judgeEntry_params *params = arock;
2964 struct DirSummary *dir = params->dir;
2965 struct SalvInfo *salvinfo = params->salvinfo;
2966 struct VnodeEssence *vnodeEssence;
2967 afs_int32 dirOrphaned, todelete;
2969 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2971 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2972 if (vnodeEssence == NULL) {
2974 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2977 CopyOnWrite(salvinfo, dir);
2978 assert(Delete(&dir->dirHandle, name) == 0);
2983 #ifndef AFS_NAMEI_ENV
2984 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2985 * mount inode for the partition. If this inode were deleted, it would crash
2988 if (vnodeEssence->InodeNumber == 0) {
2989 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2991 CopyOnWrite(salvinfo, dir);
2992 assert(Delete(&dir->dirHandle, name) == 0);
2999 if (!(vnodeNumber & 1) && !Showmode
3000 && !(vnodeEssence->count || vnodeEssence->unique
3001 || vnodeEssence->modeBits)) {
3002 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
3003 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3004 vnodeNumber, unique,
3005 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3009 CopyOnWrite(salvinfo, dir);
3010 assert(Delete(&dir->dirHandle, name) == 0);
3016 /* Check if the Uniquifiers match. If not, change the directory entry
3017 * so its unique matches the vnode unique. Delete if the unique is zero
3018 * or if the directory is orphaned.
3020 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3021 if (!vnodeEssence->unique
3022 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3023 /* This is an orphaned directory. Don't delete the . or ..
3024 * entry. Otherwise, it will get created in the next
3025 * salvage and deleted again here. So Just skip it.
3030 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3033 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3037 fid.Vnode = vnodeNumber;
3038 fid.Unique = vnodeEssence->unique;
3039 CopyOnWrite(salvinfo, dir);
3040 assert(Delete(&dir->dirHandle, name) == 0);
3042 assert(Create(&dir->dirHandle, name, &fid) == 0);
3045 return 0; /* no need to continue */
3048 if (strcmp(name, ".") == 0) {
3049 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3052 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3054 CopyOnWrite(salvinfo, dir);
3055 assert(Delete(&dir->dirHandle, ".") == 0);
3056 fid.Vnode = dir->vnodeNumber;
3057 fid.Unique = dir->unique;
3058 assert(Create(&dir->dirHandle, ".", &fid) == 0);
3061 vnodeNumber = fid.Vnode; /* Get the new Essence */
3062 unique = fid.Unique;
3063 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3066 } else if (strcmp(name, "..") == 0) {
3069 struct VnodeEssence *dotdot;
3070 pa.Vnode = dir->parent;
3071 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3072 assert(dotdot != NULL); /* XXX Should not be assert */
3073 pa.Unique = dotdot->unique;
3075 pa.Vnode = dir->vnodeNumber;
3076 pa.Unique = dir->unique;
3078 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3080 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3082 CopyOnWrite(salvinfo, dir);
3083 assert(Delete(&dir->dirHandle, "..") == 0);
3084 assert(Create(&dir->dirHandle, "..", &pa) == 0);
3087 vnodeNumber = pa.Vnode; /* Get the new Essence */
3089 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3091 dir->haveDotDot = 1;
3092 } else if (strncmp(name, ".__afs", 6) == 0) {
3094 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3097 CopyOnWrite(salvinfo, dir);
3098 assert(Delete(&dir->dirHandle, name) == 0);
3100 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3101 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3104 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3105 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3106 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3107 && !(vnodeEssence->modeBits & 0111)) {
3114 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3115 vnodeEssence->InodeNumber);
3118 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3122 size = FDH_SIZE(fdP);
3124 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3125 FDH_REALLYCLOSE(fdP);
3132 nBytes = FDH_READ(fdP, buf, size);
3133 if (nBytes == size) {
3135 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3136 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3137 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3138 Testing ? "would convert" : "converted");
3139 vnodeEssence->modeBits |= 0111;
3140 vnodeEssence->changed = 1;
3141 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3142 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3143 dir->name ? dir->name : "??", name, buf);
3145 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3146 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3148 FDH_REALLYCLOSE(fdP);
3151 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3152 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3153 if (vnodeIdToClass(vnodeNumber) == vLarge
3154 && vnodeEssence->name == NULL) {
3156 if ((n = (char *)malloc(strlen(name) + 1)))
3158 vnodeEssence->name = n;
3161 /* The directory entry points to the vnode. Check to see if the
3162 * vnode points back to the directory. If not, then let the
3163 * directory claim it (else it might end up orphaned). Vnodes
3164 * already claimed by another directory are deleted from this
3165 * directory: hardlinks to the same vnode are not allowed
3166 * from different directories.
3168 if (vnodeEssence->parent != dir->vnodeNumber) {
3169 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3170 /* Vnode does not point back to this directory.
3171 * Orphaned dirs cannot claim a file (it may belong to
3172 * another non-orphaned dir).
3175 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3177 vnodeEssence->parent = dir->vnodeNumber;
3178 vnodeEssence->changed = 1;
3180 /* Vnode was claimed by another directory */
3183 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3184 } else if (vnodeNumber == 1) {
3185 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3187 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3191 CopyOnWrite(salvinfo, dir);
3192 assert(Delete(&dir->dirHandle, name) == 0);
3197 /* This directory claims the vnode */
3198 vnodeEssence->claimed = 1;
3200 vnodeEssence->count--;
3205 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3206 VnodeClass class, Inode ino, Unique * maxu)
3208 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3209 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3210 char buf[SIZEOF_LARGEDISKVNODE];
3211 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3213 StreamHandle_t *file;
3218 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3219 fdP = IH_OPEN(vip->handle);
3220 assert(fdP != NULL);
3221 file = FDH_FDOPEN(fdP, "r+");
3222 assert(file != NULL);
3223 size = OS_SIZE(fdP->fd_fd);
3225 vip->nVnodes = (size / vcp->diskSize) - 1;
3226 if (vip->nVnodes > 0) {
3227 assert((vip->nVnodes + 1) * vcp->diskSize == size);
3228 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
3229 assert((vip->vnodes = (struct VnodeEssence *)
3230 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3231 if (class == vLarge) {
3232 assert((vip->inodes = (Inode *)
3233 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3242 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3243 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3244 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3245 nVnodes--, vnodeIndex++) {
3246 if (vnode->type != vNull) {
3247 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3248 afs_fsize_t vnodeLength;
3249 vip->nAllocatedVnodes++;
3250 vep->count = vnode->linkCount;
3251 VNDISK_GET_LEN(vnodeLength, vnode);
3252 vep->blockCount = nBlocks(vnodeLength);
3253 vip->volumeBlockCount += vep->blockCount;
3254 vep->parent = vnode->parent;
3255 vep->unique = vnode->uniquifier;
3256 if (*maxu < vnode->uniquifier)
3257 *maxu = vnode->uniquifier;
3258 vep->modeBits = vnode->modeBits;
3259 vep->InodeNumber = VNDISK_GET_INO(vnode);
3260 vep->type = vnode->type;
3261 vep->author = vnode->author;
3262 vep->owner = vnode->owner;
3263 vep->group = vnode->group;
3264 if (vnode->type == vDirectory) {
3265 if (class != vLarge) {
3266 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3267 vip->nAllocatedVnodes--;
3268 memset(vnode, 0, sizeof(vnode));
3269 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3270 vnodeIndexOffset(vcp, vnodeNumber),
3271 (char *)&vnode, sizeof(vnode));
3272 salvinfo->VolumeChanged = 1;
3274 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3283 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3286 struct VnodeEssence *parentvp;
3292 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3293 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3295 strcat(path, vp->name);
3301 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3302 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3305 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3307 struct VnodeEssence *vep;
3310 return (1); /* Vnode zero does not exist */
3312 return (0); /* The root dir vnode is always claimed */
3313 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3314 if (!vep || !vep->claimed)
3315 return (1); /* Vnode is not claimed - it is orphaned */
3317 return (IsVnodeOrphaned(salvinfo, vep->parent));
3321 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3322 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3323 struct DirSummary *rootdir, int *rootdirfound)
3325 static struct DirSummary dir;
3326 static struct DirHandle dirHandle;
3327 struct VnodeEssence *parent;
3328 static char path[MAXPATHLEN];
3331 if (dirVnodeInfo->vnodes[i].salvaged)
3332 return; /* already salvaged */
3335 dirVnodeInfo->vnodes[i].salvaged = 1;
3337 if (dirVnodeInfo->inodes[i] == 0)
3338 return; /* Not allocated to a directory */
3340 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3341 if (dirVnodeInfo->vnodes[i].parent) {
3342 Log("Bad parent, vnode 1; %s...\n",
3343 (Testing ? "skipping" : "salvaging"));
3344 dirVnodeInfo->vnodes[i].parent = 0;
3345 dirVnodeInfo->vnodes[i].changed = 1;
3348 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3349 if (parent && parent->salvaged == 0)
3350 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3351 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3352 rootdir, rootdirfound);
3355 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3356 dir.unique = dirVnodeInfo->vnodes[i].unique;
3359 dir.parent = dirVnodeInfo->vnodes[i].parent;
3360 dir.haveDot = dir.haveDotDot = 0;
3361 dir.ds_linkH = alinkH;
3362 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3363 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3365 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3368 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3369 (Testing ? "skipping" : "salvaging"));
3372 CopyAndSalvage(salvinfo, &dir);
3374 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3377 dirHandle = dir.dirHandle;
3380 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3381 &dirVnodeInfo->vnodes[i], path);
3384 /* If enumeration failed for random reasons, we will probably delete
3385 * too much stuff, so we guard against this instead.
3387 struct judgeEntry_params judge_params;
3388 judge_params.salvinfo = salvinfo;
3389 judge_params.dir = &dir;
3391 assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3394 /* Delete the old directory if it was copied in order to salvage.
3395 * CopyOnWrite has written the new inode # to the disk, but we still
3396 * have the old one in our local structure here. Thus, we idec the
3400 if (dir.copied && !Testing) {
3401 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3403 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3406 /* Remember rootdir DirSummary _after_ it has been judged */
3407 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3408 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3416 * Get a new FID that can be used to create a new file.