2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #include <afs/afs_assert.h>
105 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
106 #if defined(AFS_VFSINCL_ENV)
107 #include <sys/vnode.h>
109 #include <sys/fs/ufs_inode.h>
111 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
112 #include <ufs/ufs/dinode.h>
113 #include <ufs/ffs/fs.h>
115 #include <ufs/inode.h>
118 #else /* AFS_VFSINCL_ENV */
120 #include <ufs/inode.h>
121 #else /* AFS_OSF_ENV */
122 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
123 #include <sys/inode.h>
126 #endif /* AFS_VFSINCL_ENV */
127 #endif /* AFS_SGI_ENV */
130 #include <sys/lockf.h>
133 #include <checklist.h>
135 #if defined(AFS_SGI_ENV)
138 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
140 #include <sys/mnttab.h>
141 #include <sys/mntent.h>
146 #endif /* AFS_SGI_ENV */
147 #endif /* AFS_HPUX_ENV */
151 #include <afs/osi_inode.h>
155 #include <afs/afsutil.h>
156 #include <afs/fileutil.h>
161 #include <afs/afssyscalls.h>
165 #include "partition.h"
166 #include "daemon_com.h"
167 #include "daemon_com_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[8]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = (struct job *)malloc(sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 memset(thisjob, 0, sizeof(struct job));
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 osi_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 osi_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
615 openlog("salvager", LOG_PID, useSyslogFacility);
619 snprintf(logFileName, sizeof logFileName, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb);
622 logFile = afs_fopen(logFileName, "w");
627 SalvageFileSys1(jobs[startjob]->partP, 0);
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
634 /* If waited for all jobs to complete, now collect log files and return */
636 if (!useSyslog) /* if syslogging - no need to collect */
639 for (i = 0; i < jobcount; i++) {
640 snprintf(logFileName, sizeof logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
642 if ((passLog = afs_fopen(logFileName, "r"))) {
643 while (fgets(buf, sizeof(buf), passLog)) {
648 (void)unlink(logFileName);
657 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
659 if (!canfork || debug || Fork() == 0) {
660 SalvageFileSys1(partP, singleVolumeNumber);
661 if (canfork && !debug) {
666 Wait("SalvageFileSys");
670 get_DevName(char *pbuffer, char *wpath)
672 char pbuf[128], *ptr;
673 strcpy(pbuf, pbuffer);
674 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
680 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
682 strcpy(pbuffer, ptr + 1);
689 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 char inodeListPath[256];
693 FD_t inodeFile = INVALID_FD;
694 static char tmpDevName[100];
695 static char wpath[100];
696 struct VolumeSummary *vsp, *esp;
700 struct SalvInfo l_salvinfo;
701 struct SalvInfo *salvinfo = &l_salvinfo;
704 memset(salvinfo, 0, sizeof(*salvinfo));
707 if (inodeFile != INVALID_FD) {
709 inodeFile = INVALID_FD;
711 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
712 Abort("Raced too many times with fileserver restarts while trying to "
713 "checkout/lock volumes; Aborted\n");
715 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
717 /* unlock all previous volume locks, since we're about to lock them
719 VLockFileReinit(&partP->volLockFile);
721 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
723 salvinfo->fileSysPartition = partP;
724 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
725 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
730 name = partP->devName;
732 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 salvinfo->fileSysDeviceName = name;
736 salvinfo->filesysfulldev = wpath;
739 if (singleVolumeNumber) {
740 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
741 /* only non-DAFS locks the partition when salvaging a single volume;
742 * DAFS will lock the individual volumes in the VG */
743 VLockPartition(partP->name);
744 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
748 /* salvageserver already setup fssync conn for us */
749 if ((programType != salvageServer) && !VConnectFS()) {
750 Abort("Couldn't connect to file server\n");
753 salvinfo->useFSYNC = 1;
754 AskOffline(salvinfo, singleVolumeNumber);
755 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
756 if (LockVolume(salvinfo, singleVolumeNumber)) {
759 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
762 salvinfo->useFSYNC = 0;
763 VLockPartition(partP->name);
767 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
770 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
771 partP->name, name, (Testing ? "(READONLY mode)" : ""));
773 Log("***Forced salvage of all volumes on this partition***\n");
778 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
785 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
786 while ((dp = readdir(dirp))) {
787 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
788 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
790 Log("Removing old salvager temp files %s\n", dp->d_name);
791 strcpy(npath, salvinfo->fileSysPath);
792 strcat(npath, OS_DIRSEP);
793 strcat(npath, dp->d_name);
799 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
801 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
802 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
804 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
808 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
809 if (inodeFile == INVALID_FD) {
810 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
813 /* Using nt_unlink here since we're really using the delete on close
814 * semantics of unlink. In most places in the salvager, we really do
815 * mean to unlink the file at that point. Those places have been
816 * modified to actually do that so that the NT crt can be used there.
818 * jaltman - On NT delete on close cannot be applied to a file while the
819 * process has an open file handle that does not have DELETE file
820 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
821 * delete privileges. As a result the nt_unlink() call will always
824 code = nt_unlink(inodeListPath);
826 code = unlink(inodeListPath);
829 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
832 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
834 if (singleVolumeNumber) {
835 /* the volume group -- let alone the volume -- does not exist,
836 * but we checked it out, so give it back to the fileserver */
837 AskDelete(salvinfo, singleVolumeNumber);
841 salvinfo->inodeFd = inodeFile;
842 if (salvinfo->inodeFd == INVALID_FD)
843 Abort("Temporary file %s is missing...\n", inodeListPath);
844 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
845 if (ListInodeOption) {
846 PrintInodeList(salvinfo);
847 if (singleVolumeNumber) {
848 /* We've checked out the volume from the fileserver, and we need
849 * to give it back. We don't know if the volume exists or not,
850 * so we don't know whether to AskOnline or not. Try to determine
851 * if the volume exists by trying to read the volume header, and
852 * AskOnline if it is readable. */
853 MaybeAskOnline(salvinfo, singleVolumeNumber);
857 /* enumerate volumes in the partition.
858 * figure out sets of read-only + rw volumes.
859 * salvage each set, read-only volumes first, then read-write.
860 * Fix up inodes on last volume in set (whether it is read-write
863 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
867 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
868 i < salvinfo->nVolumesInInodeFile; i = j) {
869 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
871 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
873 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
874 struct VolumeSummary *tsp;
875 /* Scan volume list (from partition root directory) looking for the
876 * current rw volume number in the volume list from the inode scan.
877 * If there is one here that is not in the inode volume list,
879 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
881 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
883 /* Now match up the volume summary info from the root directory with the
884 * entry in the volume list obtained from scanning inodes */
885 salvinfo->inodeSummary[j].volSummary = NULL;
886 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
887 if (tsp->header.id == vid) {
888 salvinfo->inodeSummary[j].volSummary = tsp;
894 /* Salvage the group of volumes (several read-only + 1 read/write)
895 * starting with the current read-only volume we're looking at.
897 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
900 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
901 for (; vsp < esp; vsp++) {
903 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
906 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
907 RemoveTheForce(salvinfo->fileSysPath);
909 if (!Testing && singleVolumeNumber) {
911 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
912 /* unlock vol headers so the fs can attach them when we AskOnline */
913 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
914 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
916 /* Step through the volumeSummary list and set all volumes on-line.
917 * Most volumes were taken off-line in GetVolumeSummary.
918 * If a volume was deleted, don't tell the fileserver anything, since
919 * we already told the fileserver the volume was deleted back when we
920 * we destroyed the volume header.
921 * Also, make sure we bring the singleVolumeNumber back online first.
924 for (j = 0; j < salvinfo->nVolumes; j++) {
925 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
927 if (!salvinfo->volumeSummaryp[j].deleted) {
928 AskOnline(salvinfo, singleVolumeNumber);
934 /* If singleVolumeNumber is not in our volumeSummary, it means that
935 * at least one other volume in the VG is on the partition, but the
936 * RW volume is not. We've already AskOffline'd it by now, though,
937 * so make sure we don't still have the volume checked out. */
938 AskDelete(salvinfo, singleVolumeNumber);
941 for (j = 0; j < salvinfo->nVolumes; j++) {
942 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
943 if (!salvinfo->volumeSummaryp[j].deleted) {
944 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
950 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
951 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
954 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
958 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
961 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
964 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
967 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
969 Log("Error %ld destroying volume disk header for volume %lu\n",
970 afs_printable_int32_ld(code),
971 afs_printable_uint32_lu(vsp->header.id));
974 /* make sure we actually delete the fileName file; ENOENT
975 * is fine, since VDestroyVolumeDiskHeader probably already
977 if (unlink(path) && errno != ENOENT) {
978 Log("Unable to unlink %s (errno = %d)\n", path, errno);
980 if (salvinfo->useFSYNC) {
981 AskDelete(salvinfo, vsp->header.id);
989 CompareInodes(const void *_p1, const void *_p2)
991 const struct ViceInodeInfo *p1 = _p1;
992 const struct ViceInodeInfo *p2 = _p2;
993 if (p1->u.vnode.vnodeNumber == INODESPECIAL
994 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
995 VolumeId p1rwid, p2rwid;
997 (p1->u.vnode.vnodeNumber ==
998 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1000 (p2->u.vnode.vnodeNumber ==
1001 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1002 if (p1rwid < p2rwid)
1004 if (p1rwid > p2rwid)
1006 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1007 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1008 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1009 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1010 if (p1->u.vnode.volumeId == p1rwid)
1012 if (p2->u.vnode.volumeId == p2rwid)
1014 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1016 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1017 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1018 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1020 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1022 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1024 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1026 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1028 /* The following tests are reversed, so that the most desirable
1029 * of several similar inodes comes first */
1030 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1031 #ifdef AFS_3DISPARES
1032 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1033 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1036 #ifdef AFS_SGI_EXMAG
1037 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1038 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1043 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1044 #ifdef AFS_3DISPARES
1045 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1046 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1049 #ifdef AFS_SGI_EXMAG
1050 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1051 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1056 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1057 #ifdef AFS_3DISPARES
1058 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1059 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1062 #ifdef AFS_SGI_EXMAG
1063 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1064 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1069 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1070 #ifdef AFS_3DISPARES
1071 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1072 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1075 #ifdef AFS_SGI_EXMAG
1076 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1077 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1086 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1087 struct InodeSummary *summary)
1089 VolumeId volume = ip->u.vnode.volumeId;
1090 VolumeId rwvolume = volume;
1095 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1097 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1099 rwvolume = ip->u.special.parentId;
1100 /* This isn't quite right, as there could (in error) be different
1101 * parent inodes in different special vnodes */
1103 if (maxunique < ip->u.vnode.vnodeUniquifier)
1104 maxunique = ip->u.vnode.vnodeUniquifier;
1108 summary->volumeId = volume;
1109 summary->RWvolumeId = rwvolume;
1110 summary->nInodes = n;
1111 summary->nSpecialInodes = nSpecial;
1112 summary->maxUniquifier = maxunique;
1116 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1118 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1119 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1120 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1125 * Collect list of inodes in file named by path. If a truly fatal error,
1126 * unlink the file and abort. For lessor errors, return -1. The file will
1127 * be unlinked by the caller.
1130 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1134 struct ViceInodeInfo *ip, *ip_save;
1135 struct InodeSummary summary;
1136 char summaryFileName[50];
1137 FD_t summaryFile = INVALID_FD;
1139 char *dev = salvinfo->fileSysPath;
1140 char *wpath = salvinfo->fileSysPath;
1142 char *dev = salvinfo->fileSysDeviceName;
1143 char *wpath = salvinfo->filesysfulldev;
1145 char *part = salvinfo->fileSysPath;
1150 afs_sfsize_t st_size;
1152 /* This file used to come from vfsck; cobble it up ourselves now... */
1154 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1155 singleVolumeNumber ? OnlyOneVolume : 0,
1156 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1158 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1162 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1164 if (forceSal && !ForceSalvage) {
1165 Log("***Forced salvage of all volumes on this partition***\n");
1168 OS_SEEK(inodeFile, 0L, SEEK_SET);
1169 salvinfo->inodeFd = inodeFile;
1170 if (salvinfo->inodeFd == INVALID_FD ||
1171 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1172 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1174 tdir = (tmpdir ? tmpdir : part);
1176 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1177 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1179 snprintf(summaryFileName, sizeof summaryFileName,
1180 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1182 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1183 if (summaryFile == INVALID_FD) {
1184 Abort("Unable to create inode summary file\n");
1188 /* Using nt_unlink here since we're really using the delete on close
1189 * semantics of unlink. In most places in the salvager, we really do
1190 * mean to unlink the file at that point. Those places have been
1191 * modified to actually do that so that the NT crt can be used there.
1193 * jaltman - As commented elsewhere, this cannot work because fopen()
1194 * does not open files with DELETE and FILE_SHARE_DELETE.
1196 code = nt_unlink(summaryFileName);
1198 code = unlink(summaryFileName);
1201 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1204 if (!canfork || debug || Fork() == 0) {
1205 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1207 OS_CLOSE(summaryFile);
1208 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1209 RemoveTheForce(salvinfo->fileSysPath);
1211 struct VolumeSummary *vsp;
1215 GetVolumeSummary(salvinfo, singleVolumeNumber);
1217 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1218 if (vsp->fileName) {
1219 if (vsp->header.id == singleVolumeNumber) {
1222 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1228 MaybeAskOnline(salvinfo, singleVolumeNumber);
1230 /* make sure we get rid of stray .vol headers, even if
1231 * they're not in our volume summary (might happen if
1232 * e.g. something else created them and they're not in the
1233 * fileserver VGC) */
1234 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1235 singleVolumeNumber, 0 /*parent*/);
1236 AskDelete(salvinfo, singleVolumeNumber);
1240 Log("%s vice inodes on %s; not salvaged\n",
1241 singleVolumeNumber ? "No applicable" : "No", dev);
1246 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1248 OS_CLOSE(summaryFile);
1250 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1253 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1254 OS_CLOSE(summaryFile);
1255 Abort("Unable to read inode table; %s not salvaged\n", dev);
1257 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1258 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1259 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1260 OS_CLOSE(summaryFile);
1261 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1266 CountVolumeInodes(ip, nInodes, &summary);
1267 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1268 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1269 OS_CLOSE(summaryFile);
1273 summary.index += (summary.nInodes);
1274 nInodes -= summary.nInodes;
1275 ip += summary.nInodes;
1278 ip = ip_save = NULL;
1279 /* Following fflush is not fclose, because if it was debug mode would not work */
1280 if (OS_SYNC(summaryFile) == -1) {
1281 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1282 OS_CLOSE(summaryFile);
1286 if (canfork && !debug) {
1291 if (Wait("Inode summary") == -1) {
1292 OS_CLOSE(summaryFile);
1293 Exit(1); /* salvage of this partition aborted */
1297 st_size = OS_SIZE(summaryFile);
1298 osi_Assert(st_size >= 0);
1301 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1302 osi_Assert(salvinfo->inodeSummary != NULL);
1303 /* For GNU we need to do lseek to get the file pointer moved. */
1304 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1305 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1306 osi_Assert(ret == st_size);
1308 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1309 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1310 salvinfo->inodeSummary[i].volSummary = NULL;
1312 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1313 OS_CLOSE(summaryFile);
1316 if (retcode && singleVolumeNumber && !deleted) {
1317 AskError(salvinfo, singleVolumeNumber);
1323 /* Comparison routine for volume sort.
1324 This is setup so that a read-write volume comes immediately before
1325 any read-only clones of that volume */
1327 CompareVolumes(const void *_p1, const void *_p2)
1329 const struct VolumeSummary *p1 = _p1;
1330 const struct VolumeSummary *p2 = _p2;
1331 if (p1->header.parent != p2->header.parent)
1332 return p1->header.parent < p2->header.parent ? -1 : 1;
1333 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1335 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1337 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1341 * Gleans volumeSummary information by asking the fileserver
1343 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1344 * salvaging a whole partition
1346 * @return whether we obtained the volume summary information or not
1347 * @retval 0 success; we obtained the volume summary information
1348 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1350 * @retval 1 we did not get the volume summary information; either the
1351 * fileserver responded with an error, or we are not supposed to
1352 * ask the fileserver for the information (e.g. we are salvaging
1353 * the entire partition or we are not the salvageserver)
1355 * @note for non-DAFS, always returns 1
1358 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1361 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1362 if (programType == salvageServer) {
1363 if (singleVolumeNumber) {
1364 FSSYNC_VGQry_response_t q_res;
1366 struct VolumeSummary *vsp;
1368 struct VolumeDiskHeader diskHdr;
1370 memset(&res, 0, sizeof(res));
1372 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1375 * We must wait for the partition to finish scanning before
1376 * can continue, since we will not know if we got the entire
1377 * VG membership unless the partition is fully scanned.
1378 * We could, in theory, just scan the partition ourselves if
1379 * the VG cache is not ready, but we would be doing the exact
1380 * same scan the fileserver is doing; it will almost always
1381 * be faster to wait for the fileserver. The only exceptions
1382 * are if the partition does not take very long to scan, and
1383 * in that case it's fast either way, so who cares?
1385 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1386 Log("waiting for fileserver to finish scanning partition %s...\n",
1387 salvinfo->fileSysPartition->name);
1389 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1390 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1391 * just so small partitions don't need to wait over 10
1392 * seconds every time, and large partitions are generally
1393 * polled only once every ten seconds. */
1394 sleep((i > 10) ? (i = 10) : i);
1396 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1400 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1401 /* This can happen if there's no header for the volume
1402 * we're salvaging, or no headers exist for the VG (if
1403 * we're salvaging an RW). Act as if we got a response
1404 * with no VG members. The headers may be created during
1405 * salvaging, if there are inodes in this VG. */
1407 memset(&q_res, 0, sizeof(q_res));
1408 q_res.rw = singleVolumeNumber;
1412 Log("fileserver refused VGCQuery request for volume %lu on "
1413 "partition %s, code %ld reason %ld\n",
1414 afs_printable_uint32_lu(singleVolumeNumber),
1415 salvinfo->fileSysPartition->name,
1416 afs_printable_int32_ld(code),
1417 afs_printable_int32_ld(res.hdr.reason));
1421 if (q_res.rw != singleVolumeNumber) {
1422 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1423 afs_printable_uint32_lu(singleVolumeNumber),
1424 afs_printable_uint32_lu(q_res.rw));
1425 #ifdef SALVSYNC_BUILD_CLIENT
1426 if (SALVSYNC_LinkVolume(q_res.rw,
1428 salvinfo->fileSysPartition->name,
1430 Log("schedule request failed\n");
1432 #endif /* SALVSYNC_BUILD_CLIENT */
1433 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1436 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1437 osi_Assert(salvinfo->volumeSummaryp != NULL);
1439 salvinfo->nVolumes = 0;
1440 vsp = salvinfo->volumeSummaryp;
1442 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1443 char name[VMAXPATHLEN];
1445 if (!q_res.children[i]) {
1449 /* AskOffline for singleVolumeNumber was called much earlier */
1450 if (q_res.children[i] != singleVolumeNumber) {
1451 AskOffline(salvinfo, q_res.children[i]);
1452 if (LockVolume(salvinfo, q_res.children[i])) {
1458 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1460 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1461 afs_printable_uint32_lu(q_res.children[i]));
1466 DiskToVolumeHeader(&vsp->header, &diskHdr);
1467 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1468 vsp->fileName = ToString(name);
1469 salvinfo->nVolumes++;
1473 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1478 Log("Cannot get volume summary from fileserver; falling back to scanning "
1479 "entire partition\n");
1482 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1487 * count how many volume headers are found by VWalkVolumeHeaders.
1489 * @param[in] dp the disk partition (unused)
1490 * @param[in] name full path to the .vol header (unused)
1491 * @param[in] hdr the header data (unused)
1492 * @param[in] last whether this is the last try or not (unused)
1493 * @param[in] rock actually an afs_int32*; the running count of how many
1494 * volumes we have found
1499 CountHeader(struct DiskPartition64 *dp, const char *name,
1500 struct VolumeDiskHeader *hdr, int last, void *rock)
1502 afs_int32 *nvols = (afs_int32 *)rock;
1508 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1511 struct SalvageScanParams {
1512 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1513 * vol id of the VG we're salvaging */
1514 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1515 * we're filling in */
1516 afs_int32 nVolumes; /**< # of vols we've encountered */
1517 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1518 * # of vols we've alloc'd memory for) */
1519 int retry; /**< do we need to retry vol lock/checkout? */
1520 struct SalvInfo *salvinfo; /**< salvage job info */
1524 * records volume summary info found from VWalkVolumeHeaders.
1526 * Found volumes are also taken offline if they are in the specific volume
1527 * group we are looking for.
1529 * @param[in] dp the disk partition
1530 * @param[in] name full path to the .vol header
1531 * @param[in] hdr the header data
1532 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1533 * @param[in] rock actually a struct SalvageScanParams*, containing the
1534 * information needed to record the volume summary data
1536 * @return operation status
1538 * @retval -1 volume locking raced with fileserver restart; checking out
1539 * and locking volumes needs to be retried
1540 * @retval 1 volume header is mis-named and should be deleted
1543 RecordHeader(struct DiskPartition64 *dp, const char *name,
1544 struct VolumeDiskHeader *hdr, int last, void *rock)
1546 char nameShouldBe[64];
1547 struct SalvageScanParams *params;
1548 struct VolumeSummary summary;
1549 VolumeId singleVolumeNumber;
1550 struct SalvInfo *salvinfo;
1552 params = (struct SalvageScanParams *)rock;
1554 singleVolumeNumber = params->singleVolumeNumber;
1555 salvinfo = params->salvinfo;
1557 DiskToVolumeHeader(&summary.header, hdr);
1559 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1560 && summary.header.parent != singleVolumeNumber) {
1562 if (programType == salvageServer) {
1563 #ifdef SALVSYNC_BUILD_CLIENT
1564 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1565 summary.header.id, summary.header.parent);
1566 if (SALVSYNC_LinkVolume(summary.header.parent,
1570 Log("schedule request failed\n");
1573 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1576 Log("%u is a read-only volume; not salvaged\n",
1577 singleVolumeNumber);
1582 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1583 || summary.header.parent == singleVolumeNumber) {
1585 /* check if the header file is incorrectly named */
1587 const char *base = strrchr(name, OS_DIRSEPC);
1594 snprintf(nameShouldBe, sizeof nameShouldBe,
1595 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1598 if (strcmp(nameShouldBe, base)) {
1599 /* .vol file has wrong name; retry/delete */
1603 if (!badname || last) {
1604 /* only offline the volume if the header is good, or if this is
1605 * the last try looking at it; avoid AskOffline'ing the same vol
1608 if (singleVolumeNumber
1609 && summary.header.id != singleVolumeNumber) {
1610 /* don't offline singleVolumeNumber; we already did that
1613 AskOffline(salvinfo, summary.header.id);
1615 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1617 /* don't lock the volume if the header is bad, since we're
1618 * about to delete it anyway. */
1619 if (LockVolume(salvinfo, summary.header.id)) {
1624 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1628 if (last && !Showmode) {
1629 Log("Volume header file %s is incorrectly named (should be %s "
1630 "not %s); %sdeleted (it will be recreated later, if "
1631 "necessary)\n", name, nameShouldBe, base,
1632 (Testing ? "it would have been " : ""));
1637 summary.fileName = ToString(base);
1640 if (params->nVolumes > params->totalVolumes) {
1641 /* We found more volumes than we found on the first partition walk;
1642 * apparently something created a volume while we were
1643 * partition-salvaging, or we found more than 20 vols when salvaging a
1644 * particular volume. Abort if we detect this, since other programs
1645 * supposed to not touch the partition while it is partition-salvaging,
1646 * and we shouldn't find more than 20 vols in a VG.
1648 Abort("Found %ld vol headers, but should have found at most %ld! "
1649 "Make sure the volserver/fileserver are not running at the "
1650 "same time as a partition salvage\n",
1651 afs_printable_int32_ld(params->nVolumes),
1652 afs_printable_int32_ld(params->totalVolumes));
1655 memcpy(params->vsp, &summary, sizeof(summary));
1663 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1665 * If the header could not be read in at all, the header is always unlinked.
1666 * If instead RecordHeader said the header was bad (that is, the header file
1667 * is mis-named), we only unlink if we are doing a partition salvage, as
1668 * opposed to salvaging a specific volume group.
1670 * @param[in] dp the disk partition
1671 * @param[in] name full path to the .vol header
1672 * @param[in] hdr header data, or NULL if the header could not be read
1673 * @param[in] rock actually a struct SalvageScanParams*, with some information
1677 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1678 struct VolumeDiskHeader *hdr, void *rock)
1680 struct SalvageScanParams *params;
1683 params = (struct SalvageScanParams *)rock;
1686 /* no header; header is too bogus to read in at all */
1688 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1694 } else if (!params->singleVolumeNumber) {
1695 /* We were able to read in a header, but RecordHeader said something
1696 * was wrong with it. We only unlink those if we are doing a partition
1703 if (dounlink && unlink(name)) {
1704 Log("Error %d while trying to unlink %s\n", errno, name);
1709 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1710 * the fileserver for VG information, or by scanning the /vicepX partition.
1712 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1713 * are salvaging, or 0 if this is a partition
1716 * @return operation status
1718 * @retval -1 we raced with a fileserver restart; checking out and locking
1719 * volumes must be retried
1722 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1724 afs_int32 nvols = 0;
1725 struct SalvageScanParams params;
1728 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1730 /* we successfully got the vol information from the fileserver; no
1731 * need to scan the partition */
1735 /* we need to retry volume checkout */
1739 if (!singleVolumeNumber) {
1740 /* Count how many volumes we have in /vicepX */
1741 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1744 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1749 nvols = VOL_VG_MAX_VOLS;
1752 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1753 osi_Assert(salvinfo->volumeSummaryp != NULL);
1755 params.singleVolumeNumber = singleVolumeNumber;
1756 params.vsp = salvinfo->volumeSummaryp;
1757 params.nVolumes = 0;
1758 params.totalVolumes = nvols;
1760 params.salvinfo = salvinfo;
1762 /* walk the partition directory of volume headers and record the info
1763 * about them; unlinking invalid headers */
1764 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1765 UnlinkHeader, ¶ms);
1767 /* we apparently need to retry checking-out/locking volumes */
1771 Abort("Failed to get volume header summary\n");
1773 salvinfo->nVolumes = params.nVolumes;
1775 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1781 /* Find the link table. This should be associated with the RW volume or, if
1782 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1785 FindLinkHandle(struct InodeSummary *isp, int nVols,
1786 struct ViceInodeInfo *allInodes)
1789 struct ViceInodeInfo *ip;
1791 for (i = 0; i < nVols; i++) {
1792 ip = allInodes + isp[i].index;
1793 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1794 if (ip[j].u.special.type == VI_LINKTABLE)
1795 return ip[j].inodeNumber;
1802 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1804 struct versionStamp version;
1807 if (!VALID_INO(ino))
1809 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1810 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1811 if (!VALID_INO(ino))
1813 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1814 isp->RWvolumeId, errno);
1815 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1816 fdP = IH_OPEN(salvinfo->VGLinkH);
1818 Abort("Can't open link table for volume %u (error = %d)\n",
1819 isp->RWvolumeId, errno);
1821 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1822 Abort("Can't truncate link table for volume %u (error = %d)\n",
1823 isp->RWvolumeId, errno);
1825 version.magic = LINKTABLEMAGIC;
1826 version.version = LINKTABLEVERSION;
1828 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1830 Abort("Can't truncate link table for volume %u (error = %d)\n",
1831 isp->RWvolumeId, errno);
1833 FDH_REALLYCLOSE(fdP);
1835 /* If the volume summary exits (i.e., the V*.vol header file exists),
1836 * then set this inode there as well.
1838 if (isp->volSummary)
1839 isp->volSummary->header.linkTable = ino;
1848 SVGParms_t *parms = (SVGParms_t *) arg;
1849 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1854 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1857 pthread_attr_t tattr;
1861 /* Initialize per volume global variables, even if later code does so */
1862 salvinfo->VolumeChanged = 0;
1863 salvinfo->VGLinkH = NULL;
1864 salvinfo->VGLinkH_cnt = 0;
1865 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1867 parms.svgp_inodeSummaryp = isp;
1868 parms.svgp_count = nVols;
1869 parms.svgp_salvinfo = salvinfo;
1870 code = pthread_attr_init(&tattr);
1872 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1876 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1878 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1881 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1883 Log("Failed to create thread to salvage volume group %u\n",
1887 (void)pthread_join(tid, NULL);
1889 #endif /* AFS_NT40_ENV */
1892 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1894 struct ViceInodeInfo *inodes, *allInodes, *ip;
1895 int i, totalInodes, size, salvageTo;
1899 int dec_VGLinkH = 0;
1901 FdHandle_t *fdP = NULL;
1903 salvinfo->VGLinkH_cnt = 0;
1904 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1905 && isp->nSpecialInodes > 0);
1906 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1907 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1910 if (ShowMounts && !haveRWvolume)
1912 if (canfork && !debug && Fork() != 0) {
1913 (void)Wait("Salvage volume group");
1916 for (i = 0, totalInodes = 0; i < nVols; i++)
1917 totalInodes += isp[i].nInodes;
1918 size = totalInodes * sizeof(struct ViceInodeInfo);
1919 inodes = (struct ViceInodeInfo *)malloc(size);
1920 allInodes = inodes - isp->index; /* this would the base of all the inodes
1921 * for the partition, if all the inodes
1922 * had been read into memory */
1924 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1926 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1928 /* Don't try to salvage a read write volume if there isn't one on this
1930 salvageTo = haveRWvolume ? 0 : 1;
1932 #ifdef AFS_NAMEI_ENV
1933 ino = FindLinkHandle(isp, nVols, allInodes);
1934 if (VALID_INO(ino)) {
1935 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1936 fdP = IH_OPEN(salvinfo->VGLinkH);
1938 if (!VALID_INO(ino) || fdP == NULL) {
1939 Log("%s link table for volume %u.\n",
1940 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1942 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1945 struct ViceInodeInfo *ip;
1946 CreateLinkTable(salvinfo, isp, ino);
1947 fdP = IH_OPEN(salvinfo->VGLinkH);
1948 /* Sync fake 1 link counts to the link table, now that it exists */
1950 for (i = 0; i < nVols; i++) {
1951 ip = allInodes + isp[i].index;
1952 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1953 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1960 FDH_REALLYCLOSE(fdP);
1962 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1965 /* Salvage in reverse order--read/write volume last; this way any
1966 * Inodes not referenced by the time we salvage the read/write volume
1967 * can be picked up by the read/write volume */
1968 /* ACTUALLY, that's not done right now--the inodes just vanish */
1969 for (i = nVols - 1; i >= salvageTo; i--) {
1971 struct InodeSummary *lisp = &isp[i];
1972 #ifdef AFS_NAMEI_ENV
1973 /* If only the RO is present on this partition, the link table
1974 * shows up as a RW volume special file. Need to make sure the
1975 * salvager doesn't try to salvage the non-existent RW.
1977 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1978 /* If this only special inode is the link table, continue */
1979 if (inodes->u.special.type == VI_LINKTABLE) {
1986 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1987 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1988 /* Check inodes twice. The second time do things seriously. This
1989 * way the whole RO volume can be deleted, below, if anything goes wrong */
1990 for (check = 1; check >= 0; check--) {
1992 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1994 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1995 if (rw && deleteMe) {
1996 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1997 * volume won't be called */
2003 if (rw && check == 1)
2005 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2006 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2012 /* Fix actual inode counts */
2015 Log("totalInodes %d\n",totalInodes);
2016 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2017 static int TraceBadLinkCounts = 0;
2018 #ifdef AFS_NAMEI_ENV
2019 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2020 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2021 VGLinkH_p1 = ip->u.param[0];
2022 continue; /* Deal with this last. */
2025 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2026 TraceBadLinkCounts--; /* Limit reports, per volume */
2027 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2029 while (ip->linkCount > 0) {
2030 /* below used to assert, not break */
2032 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2033 Log("idec failed. inode %s errno %d\n",
2034 PrintInode(stmp, ip->inodeNumber), errno);
2040 while (ip->linkCount < 0) {
2041 /* these used to be asserts */
2043 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2044 Log("iinc failed. inode %s errno %d\n",
2045 PrintInode(stmp, ip->inodeNumber), errno);
2052 #ifdef AFS_NAMEI_ENV
2053 while (dec_VGLinkH > 0) {
2054 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2055 Log("idec failed on link table, errno = %d\n", errno);
2059 while (dec_VGLinkH < 0) {
2060 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2061 Log("iinc failed on link table, errno = %d\n", errno);
2068 /* Directory consistency checks on the rw volume */
2070 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2071 IH_RELEASE(salvinfo->VGLinkH);
2073 if (canfork && !debug) {
2080 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2082 /* Check headers BEFORE forking */
2086 for (i = 0; i < nVols; i++) {
2087 struct VolumeSummary *vs = isp[i].volSummary;
2088 VolumeDiskData volHeader;
2090 /* Don't salvage just because phantom rw volume is there... */
2091 /* (If a read-only volume exists, read/write inodes must also exist) */
2092 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2096 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2097 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2098 == sizeof(volHeader)
2099 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2100 && volHeader.dontSalvage == DONT_SALVAGE
2101 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2102 if (volHeader.inUse != 0) {
2103 volHeader.inUse = 0;
2104 volHeader.inService = 1;
2106 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2107 != sizeof(volHeader)) {
2123 /* SalvageVolumeHeaderFile
2125 * Salvage the top level V*.vol header file. Make sure the special files
2126 * exist and that there are no duplicates.
2128 * Calls SalvageHeader for each possible type of volume special file.
2132 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2133 struct ViceInodeInfo *inodes, int RW,
2134 int check, int *deleteMe)
2137 struct ViceInodeInfo *ip;
2138 int allinodesobsolete = 1;
2139 struct VolumeDiskHeader diskHeader;
2140 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2142 struct VolumeHeader tempHeader;
2143 struct afs_inode_info stuff[MAXINODETYPE];
2145 /* keeps track of special inodes that are probably 'good'; they are
2146 * referenced in the vol header, and are included in the given inodes
2151 } goodspecial[MAXINODETYPE];
2156 memset(goodspecial, 0, sizeof(goodspecial));
2158 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2160 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2162 Log("cannot allocate memory for inode skip array when salvaging "
2163 "volume %lu; not performing duplicate special inode recovery\n",
2164 afs_printable_uint32_lu(isp->volumeId));
2165 /* still try to perform the salvage; the skip array only does anything
2166 * if we detect duplicate special inodes */
2169 init_inode_info(&tempHeader, stuff);
2172 * First, look at the special inodes and see if any are referenced by
2173 * the existing volume header. If we find duplicate special inodes, we
2174 * can use this information to use the referenced inode (it's more
2175 * likely to be the 'good' one), and throw away the duplicates.
2177 if (isp->volSummary && skip) {
2178 /* use tempHeader, so we can use the stuff[] array to easily index
2179 * into the isp->volSummary special inodes */
2180 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2182 for (i = 0; i < isp->nSpecialInodes; i++) {
2183 ip = &inodes[isp->index + i];
2184 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2185 /* will get taken care of in a later loop */
2188 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2189 goodspecial[ip->u.special.type-1].valid = 1;
2190 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2195 memset(&tempHeader, 0, sizeof(tempHeader));
2196 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2197 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2198 tempHeader.id = isp->volumeId;
2199 tempHeader.parent = isp->RWvolumeId;
2201 /* Check for duplicates (inodes are sorted by type field) */
2202 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2203 ip = &inodes[isp->index + i];
2204 if (ip->u.special.type == (ip + 1)->u.special.type) {
2205 afs_ino_str_t stmp1, stmp2;
2207 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2208 /* Will be caught in the loop below */
2212 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2213 ip->u.special.type, isp->volumeId,
2214 PrintInode(stmp1, ip->inodeNumber),
2215 PrintInode(stmp2, (ip+1)->inodeNumber));
2217 if (skip && goodspecial[ip->u.special.type-1].valid) {
2218 Inode gi = goodspecial[ip->u.special.type-1].inode;
2221 Log("using special inode referenced by vol header (%s)\n",
2222 PrintInode(stmp1, gi));
2225 /* the volume header references some special inode of
2226 * this type in the inodes array; are we it? */
2227 if (ip->inodeNumber != gi) {
2229 } else if ((ip+1)->inodeNumber != gi) {
2230 /* in case this is the last iteration; we need to
2231 * make sure we check ip+1, too */
2236 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2244 for (i = 0; i < isp->nSpecialInodes; i++) {
2246 ip = &inodes[isp->index + i];
2247 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2249 Log("Rubbish header inode %s of type %d\n",
2250 PrintInode(stmp, ip->inodeNumber),
2251 ip->u.special.type);
2257 Log("Rubbish header inode %s of type %d; deleted\n",
2258 PrintInode(stmp, ip->inodeNumber),
2259 ip->u.special.type);
2260 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2261 if (skip && skip[i]) {
2262 if (orphans == ORPH_REMOVE) {
2263 Log("Removing orphan special inode %s of type %d\n",
2264 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2267 Log("Ignoring orphan special inode %s of type %d\n",
2268 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2269 /* fall through to the ip->linkCount--; line below */
2272 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2273 allinodesobsolete = 0;
2275 if (!check && ip->u.special.type != VI_LINKTABLE)
2276 ip->linkCount--; /* Keep the inode around */
2284 if (allinodesobsolete) {
2291 salvinfo->VGLinkH_cnt++; /* one for every header. */
2293 if (!RW && !check && isp->volSummary) {
2294 ClearROInUseBit(isp->volSummary);
2298 for (i = 0; i < MAXINODETYPE; i++) {
2299 if (stuff[i].inodeType == VI_LINKTABLE) {
2300 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2301 * And we may have recreated the link table earlier, so set the
2302 * RW header as well.
2304 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2305 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2309 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2313 if (isp->volSummary == NULL) {
2315 char headerName[64];
2316 snprintf(headerName, sizeof headerName, VFORMAT,
2317 afs_printable_uint32_lu(isp->volumeId));
2318 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2319 salvinfo->fileSysPath, headerName);
2321 Log("No header file for volume %u\n", isp->volumeId);
2325 Log("No header file for volume %u; %screating %s\n",
2326 isp->volumeId, (Testing ? "it would have been " : ""),
2328 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2329 isp->volSummary->fileName = ToString(headerName);
2331 writefunc = VCreateVolumeDiskHeader;
2334 char headerName[64];
2335 /* hack: these two fields are obsolete... */
2336 isp->volSummary->header.volumeAcl = 0;
2337 isp->volSummary->header.volumeMountTable = 0;
2340 (&isp->volSummary->header, &tempHeader,
2341 sizeof(struct VolumeHeader))) {
2342 /* We often remove the name before calling us, so we make a fake one up */
2343 if (isp->volSummary->fileName) {
2344 strcpy(headerName, isp->volSummary->fileName);
2346 snprintf(headerName, sizeof headerName, VFORMAT,
2347 afs_printable_uint32_lu(isp->volumeId));
2348 isp->volSummary->fileName = ToString(headerName);
2350 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2351 salvinfo->fileSysPath, headerName);
2353 Log("Header file %s is damaged or no longer valid%s\n", path,
2354 (check ? "" : "; repairing"));
2358 writefunc = VWriteVolumeDiskHeader;
2362 memcpy(&isp->volSummary->header, &tempHeader,
2363 sizeof(struct VolumeHeader));
2366 Log("It would have written a new header file for volume %u\n",
2370 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2371 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2373 Log("Error %ld writing volume header file for volume %lu\n",
2374 afs_printable_int32_ld(code),
2375 afs_printable_uint32_lu(diskHeader.id));
2380 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2381 isp->volSummary->header.volumeInfo);
2386 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2387 struct InodeSummary *isp, int check, int *deleteMe)
2390 VolumeDiskData volumeInfo;
2391 struct versionStamp fileHeader;
2400 #ifndef AFS_NAMEI_ENV
2401 if (sp->inodeType == VI_LINKTABLE)
2404 if (*(sp->inode) == 0) {
2406 Log("Missing inode in volume header (%s)\n", sp->description);
2410 Log("Missing inode in volume header (%s); %s\n", sp->description,
2411 (Testing ? "it would have recreated it" : "recreating"));
2414 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2415 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2416 if (!VALID_INO(*(sp->inode)))
2418 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2419 sp->description, errno);
2424 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2425 fdP = IH_OPEN(specH);
2426 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2427 /* bail out early and destroy the volume */
2429 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2436 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2437 sp->description, errno);
2440 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2441 || header.fileHeader.magic != sp->stamp.magic)) {
2443 Log("Part of the header (%s) is corrupted\n", sp->description);
2444 FDH_REALLYCLOSE(fdP);
2448 Log("Part of the header (%s) is corrupted; recreating\n",
2451 /* header can be garbage; make sure we don't read garbage data from
2453 memset(&header, 0, sizeof(header));
2455 if (sp->inodeType == VI_VOLINFO
2456 && header.volumeInfo.destroyMe == DESTROY_ME) {
2459 FDH_REALLYCLOSE(fdP);
2463 if (recreate && !Testing) {
2466 ("Internal error: recreating volume header (%s) in check mode\n",
2468 nBytes = FDH_TRUNC(fdP, 0);
2470 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2471 sp->description, errno);
2473 /* The following code should be moved into vutil.c */
2474 if (sp->inodeType == VI_VOLINFO) {
2476 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2477 header.volumeInfo.stamp = sp->stamp;
2478 header.volumeInfo.id = isp->volumeId;
2479 header.volumeInfo.parentId = isp->RWvolumeId;
2480 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2481 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2482 isp->volumeId, isp->volumeId);
2483 header.volumeInfo.inService = 0;
2484 header.volumeInfo.blessed = 0;
2485 /* The + 1000 is a hack in case there are any files out in venus caches */
2486 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2487 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2488 header.volumeInfo.needsCallback = 0;
2489 gettimeofday(&tp, NULL);
2490 header.volumeInfo.creationDate = tp.tv_sec;
2492 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2493 sizeof(header.volumeInfo), 0);
2494 if (nBytes != sizeof(header.volumeInfo)) {
2497 ("Unable to write volume header file (%s) (errno = %d)\n",
2498 sp->description, errno);
2499 Abort("Unable to write entire volume header file (%s)\n",
2503 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2504 if (nBytes != sizeof(sp->stamp)) {
2507 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2508 sp->description, errno);
2510 ("Unable to write entire version stamp in volume header file (%s)\n",
2515 FDH_REALLYCLOSE(fdP);
2517 if (sp->inodeType == VI_VOLINFO) {
2518 salvinfo->VolInfo = header.volumeInfo;
2522 if (salvinfo->VolInfo.updateDate) {
2523 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2525 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2526 salvinfo->VolInfo.id,
2527 (Testing ? "it would have been " : ""), update);
2529 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2531 Log("%s (%u) not updated (created %s)\n",
2532 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2542 SalvageVnodes(struct SalvInfo *salvinfo,
2543 struct InodeSummary *rwIsp,
2544 struct InodeSummary *thisIsp,
2545 struct ViceInodeInfo *inodes, int check)
2547 int ilarge, ismall, ioffset, RW, nInodes;
2548 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2551 RW = (rwIsp == thisIsp);
2552 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2554 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2555 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2556 if (check && ismall == -1)
2559 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2560 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2561 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2565 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2566 struct ViceInodeInfo *ip, int nInodes,
2567 struct VolumeSummary *volSummary, int check)
2569 char buf[SIZEOF_LARGEDISKVNODE];
2570 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2572 StreamHandle_t *file;
2573 struct VnodeClassInfo *vcp;
2575 afs_sfsize_t nVnodes;
2576 afs_fsize_t vnodeLength;
2578 afs_ino_str_t stmp1, stmp2;
2582 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2583 fdP = IH_OPEN(handle);
2584 osi_Assert(fdP != NULL);
2585 file = FDH_FDOPEN(fdP, "r+");
2586 osi_Assert(file != NULL);
2587 vcp = &VnodeClassInfo[class];
2588 size = OS_SIZE(fdP->fd_fd);
2589 osi_Assert(size != -1);
2590 nVnodes = (size / vcp->diskSize) - 1;
2592 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2593 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2597 for (vnodeIndex = 0;
2598 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2599 nVnodes--, vnodeIndex++) {
2600 if (vnode->type != vNull) {
2601 int vnodeChanged = 0;
2602 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2603 if (VNDISK_GET_INO(vnode) == 0) {
2605 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2606 memset(vnode, 0, vcp->diskSize);
2610 if (vcp->magic != vnode->vnodeMagic) {
2611 /* bad magic #, probably partially created vnode */
2613 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2614 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2615 afs_printable_uint32_lu(vcp->magic));
2616 memset(vnode, 0, vcp->diskSize);
2620 Log("Partially allocated vnode %d deleted.\n",
2622 memset(vnode, 0, vcp->diskSize);
2626 /* ****** Should do a bit more salvage here: e.g. make sure
2627 * vnode type matches what it should be given the index */
2628 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2629 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2630 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2631 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2638 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2639 /* The following doesn't work, because the version number
2640 * is not maintained correctly by the file server */
2641 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2642 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2644 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2650 /* For RW volume, look for vnode with matching inode number;
2651 * if no such match, take the first determined by our sort
2653 struct ViceInodeInfo *lip = ip;
2654 int lnInodes = nInodes;
2656 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2657 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2666 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2667 /* "Matching" inode */
2671 vu = vnode->uniquifier;
2672 iu = ip->u.vnode.vnodeUniquifier;
2673 vd = vnode->dataVersion;
2674 id = ip->u.vnode.inodeDataVersion;
2676 * Because of the possibility of the uniquifier overflows (> 4M)
2677 * we compare them modulo the low 22-bits; we shouldn't worry
2678 * about mismatching since they shouldn't to many old
2679 * uniquifiers of the same vnode...
2681 if (IUnique(vu) != IUnique(iu)) {
2683 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2686 vnode->uniquifier = iu;
2687 #ifdef AFS_3DISPARES
2688 vnode->dataVersion = (id >= vd ?
2691 1887437 ? vd : id) :
2694 1887437 ? id : vd));
2696 #if defined(AFS_SGI_EXMAG)
2697 vnode->dataVersion = (id >= vd ?
2700 15099494 ? vd : id) :
2703 15099494 ? id : vd));
2705 vnode->dataVersion = (id > vd ? id : vd);
2706 #endif /* AFS_SGI_EXMAG */
2707 #endif /* AFS_3DISPARES */
2710 /* don't bother checking for vd > id any more, since
2711 * partial file transfers always result in this state,
2712 * and you can't do much else anyway (you've already
2713 * found the best data you can) */
2714 #ifdef AFS_3DISPARES
2715 if (!vnodeIsDirectory(vnodeNumber)
2716 && ((vd < id && (id - vd) < 1887437)
2717 || ((vd > id && (vd - id) > 1887437)))) {
2719 #if defined(AFS_SGI_EXMAG)
2720 if (!vnodeIsDirectory(vnodeNumber)
2721 && ((vd < id && (id - vd) < 15099494)
2722 || ((vd > id && (vd - id) > 15099494)))) {
2724 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2725 #endif /* AFS_SGI_EXMAG */
2728 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2729 vnode->dataVersion = id;
2734 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2737 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2739 VNDISK_SET_INO(vnode, ip->inodeNumber);
2744 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2746 VNDISK_SET_INO(vnode, ip->inodeNumber);
2749 VNDISK_GET_LEN(vnodeLength, vnode);
2750 if (ip->byteCount != vnodeLength) {
2753 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2758 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2759 VNDISK_SET_LEN(vnode, ip->byteCount);
2763 ip->linkCount--; /* Keep the inode around */
2766 } else { /* no matching inode */
2768 if (VNDISK_GET_INO(vnode) != 0
2769 || vnode->type == vDirectory) {
2770 /* No matching inode--get rid of the vnode */
2772 if (VNDISK_GET_INO(vnode)) {
2774 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2778 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2783 if (VNDISK_GET_INO(vnode)) {
2785 time_t serverModifyTime = vnode->serverModifyTime;
2786 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2790 time_t serverModifyTime = vnode->serverModifyTime;
2791 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2794 memset(vnode, 0, vcp->diskSize);
2797 /* Should not reach here becuase we checked for
2798 * (inodeNumber == 0) above. And where we zero the vnode,
2799 * we also goto vnodeDone.
2803 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2807 } /* VNDISK_GET_INO(vnode) != 0 */
2809 osi_Assert(!(vnodeChanged && check));
2810 if (vnodeChanged && !Testing) {
2811 osi_Assert(IH_IWRITE
2812 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2813 (char *)vnode, vcp->diskSize)
2815 salvinfo->VolumeChanged = 1; /* For break call back */
2826 struct VnodeEssence *
2827 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2830 struct VnodeInfo *vip;
2833 class = vnodeIdToClass(vnodeNumber);
2834 vip = &salvinfo->vnodeInfo[class];
2835 offset = vnodeIdToBitNumber(vnodeNumber);
2836 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2840 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2842 /* Copy the directory unconditionally if we are going to change it:
2843 * not just if was cloned.
2845 struct VnodeDiskObject vnode;
2846 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2847 Inode oldinode, newinode;
2850 if (dir->copied || Testing)
2852 DFlush(); /* Well justified paranoia... */
2855 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2856 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2858 osi_Assert(code == sizeof(vnode));
2859 oldinode = VNDISK_GET_INO(&vnode);
2860 /* Increment the version number by a whole lot to avoid problems with
2861 * clients that were promised new version numbers--but the file server
2862 * crashed before the versions were written to disk.
2865 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2866 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2868 osi_Assert(VALID_INO(newinode));
2869 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2871 VNDISK_SET_INO(&vnode, newinode);
2873 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2874 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2876 osi_Assert(code == sizeof(vnode));
2878 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2879 salvinfo->fileSysDevice, newinode,
2880 &salvinfo->VolumeChanged);
2881 /* Don't delete the original inode right away, because the directory is
2882 * still being scanned.
2888 * This function should either successfully create a new dir, or give up
2889 * and leave things the way they were. In particular, if it fails to write
2890 * the new dir properly, it should return w/o changing the reference to the
2894 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2896 struct VnodeDiskObject vnode;
2897 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2898 Inode oldinode, newinode;
2903 afs_int32 parentUnique = 1;
2904 struct VnodeEssence *vnodeEssence;
2909 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2911 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2912 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2914 osi_Assert(lcode == sizeof(vnode));
2915 oldinode = VNDISK_GET_INO(&vnode);
2916 /* Increment the version number by a whole lot to avoid problems with
2917 * clients that were promised new version numbers--but the file server
2918 * crashed before the versions were written to disk.
2921 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2922 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2924 osi_Assert(VALID_INO(newinode));
2925 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2926 &salvinfo->VolumeChanged);
2928 /* Assign . and .. vnode numbers from dir and vnode.parent.
2929 * The uniquifier for . is in the vnode.
2930 * The uniquifier for .. might be set to a bogus value of 1 and
2931 * the salvager will later clean it up.
2933 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2934 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2937 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2939 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2944 /* didn't really build the new directory properly, let's just give up. */
2945 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2946 Log("Directory salvage returned code %d, continuing.\n", code);
2948 Log("also failed to decrement link count on new inode");
2952 Log("Checking the results of the directory salvage...\n");
2953 if (!DirOK(&newdir)) {
2954 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2955 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2956 osi_Assert(code == 0);
2960 VNDISK_SET_INO(&vnode, newinode);
2961 length = Length(&newdir);
2962 VNDISK_SET_LEN(&vnode, length);
2964 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2965 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2967 osi_Assert(lcode == sizeof(vnode));
2970 nt_sync(salvinfo->fileSysDevice);
2972 sync(); /* this is slow, but hopefully rarely called. We don't have
2973 * an open FD on the file itself to fsync.
2977 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2979 /* make sure old directory file is really closed */
2980 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2981 FDH_REALLYCLOSE(fdP);
2983 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2984 osi_Assert(code == 0);
2985 dir->dirHandle = newdir;
2989 * arguments for JudgeEntry.
2991 struct judgeEntry_params {
2992 struct DirSummary *dir; /**< directory we're examining entries in */
2993 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2997 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3000 struct judgeEntry_params *params = arock;
3001 struct DirSummary *dir = params->dir;
3002 struct SalvInfo *salvinfo = params->salvinfo;
3003 struct VnodeEssence *vnodeEssence;
3004 afs_int32 dirOrphaned, todelete;
3006 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3008 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3009 if (vnodeEssence == NULL) {
3011 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3014 CopyOnWrite(salvinfo, dir);
3015 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3020 #ifndef AFS_NAMEI_ENV
3021 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3022 * mount inode for the partition. If this inode were deleted, it would crash
3025 if (vnodeEssence->InodeNumber == 0) {
3026 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3028 CopyOnWrite(salvinfo, dir);
3029 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3036 if (!(vnodeNumber & 1) && !Showmode
3037 && !(vnodeEssence->count || vnodeEssence->unique
3038 || vnodeEssence->modeBits)) {
3039 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3040 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3041 vnodeNumber, unique,
3042 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3046 CopyOnWrite(salvinfo, dir);
3047 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3053 /* Check if the Uniquifiers match. If not, change the directory entry
3054 * so its unique matches the vnode unique. Delete if the unique is zero
3055 * or if the directory is orphaned.
3057 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3058 if (!vnodeEssence->unique
3059 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3060 /* This is an orphaned directory. Don't delete the . or ..
3061 * entry. Otherwise, it will get created in the next
3062 * salvage and deleted again here. So Just skip it.
3067 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3070 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3074 fid.Vnode = vnodeNumber;
3075 fid.Unique = vnodeEssence->unique;
3076 CopyOnWrite(salvinfo, dir);
3077 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3079 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3082 return 0; /* no need to continue */
3085 if (strcmp(name, ".") == 0) {
3086 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3089 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3091 CopyOnWrite(salvinfo, dir);
3092 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3093 fid.Vnode = dir->vnodeNumber;
3094 fid.Unique = dir->unique;
3095 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3098 vnodeNumber = fid.Vnode; /* Get the new Essence */
3099 unique = fid.Unique;
3100 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3103 } else if (strcmp(name, "..") == 0) {
3106 struct VnodeEssence *dotdot;
3107 pa.Vnode = dir->parent;
3108 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3109 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3110 pa.Unique = dotdot->unique;
3112 pa.Vnode = dir->vnodeNumber;
3113 pa.Unique = dir->unique;
3115 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3117 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3119 CopyOnWrite(salvinfo, dir);
3120 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3121 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3124 vnodeNumber = pa.Vnode; /* Get the new Essence */
3126 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3128 dir->haveDotDot = 1;
3129 } else if (strncmp(name, ".__afs", 6) == 0) {
3131 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3134 CopyOnWrite(salvinfo, dir);
3135 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3137 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3138 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3141 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3142 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3143 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3144 && !(vnodeEssence->modeBits & 0111)) {
3145 afs_sfsize_t nBytes;
3151 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3152 vnodeEssence->InodeNumber);
3155 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3159 size = FDH_SIZE(fdP);
3161 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3162 FDH_REALLYCLOSE(fdP);
3169 nBytes = FDH_PREAD(fdP, buf, size, 0);
3170 if (nBytes == size) {
3172 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3173 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3174 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3175 Testing ? "would convert" : "converted");
3176 vnodeEssence->modeBits |= 0111;
3177 vnodeEssence->changed = 1;
3178 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3179 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3180 dir->name ? dir->name : "??", name, buf);
3182 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3183 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3185 FDH_REALLYCLOSE(fdP);
3188 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3189 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3190 if (vnodeIdToClass(vnodeNumber) == vLarge
3191 && vnodeEssence->name == NULL) {
3193 if ((n = (char *)malloc(strlen(name) + 1)))
3195 vnodeEssence->name = n;
3198 /* The directory entry points to the vnode. Check to see if the
3199 * vnode points back to the directory. If not, then let the
3200 * directory claim it (else it might end up orphaned). Vnodes
3201 * already claimed by another directory are deleted from this
3202 * directory: hardlinks to the same vnode are not allowed
3203 * from different directories.
3205 if (vnodeEssence->parent != dir->vnodeNumber) {
3206 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3207 /* Vnode does not point back to this directory.
3208 * Orphaned dirs cannot claim a file (it may belong to
3209 * another non-orphaned dir).
3212 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3214 vnodeEssence->parent = dir->vnodeNumber;
3215 vnodeEssence->changed = 1;
3217 /* Vnode was claimed by another directory */
3220 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3221 } else if (vnodeNumber == 1) {
3222 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3224 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3228 CopyOnWrite(salvinfo, dir);
3229 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3234 /* This directory claims the vnode */
3235 vnodeEssence->claimed = 1;
3237 vnodeEssence->count--;
3242 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3243 VnodeClass class, Inode ino, Unique * maxu)
3245 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3246 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3247 char buf[SIZEOF_LARGEDISKVNODE];
3248 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3250 StreamHandle_t *file;
3255 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3256 fdP = IH_OPEN(vip->handle);
3257 osi_Assert(fdP != NULL);
3258 file = FDH_FDOPEN(fdP, "r+");
3259 osi_Assert(file != NULL);
3260 size = OS_SIZE(fdP->fd_fd);
3261 osi_Assert(size != -1);
3262 vip->nVnodes = (size / vcp->diskSize) - 1;
3263 if (vip->nVnodes > 0) {
3264 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3265 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3266 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3267 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3268 if (class == vLarge) {
3269 osi_Assert((vip->inodes = (Inode *)
3270 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3279 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3280 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3281 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3282 nVnodes--, vnodeIndex++) {
3283 if (vnode->type != vNull) {
3284 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3285 afs_fsize_t vnodeLength;
3286 vip->nAllocatedVnodes++;
3287 vep->count = vnode->linkCount;
3288 VNDISK_GET_LEN(vnodeLength, vnode);
3289 vep->blockCount = nBlocks(vnodeLength);
3290 vip->volumeBlockCount += vep->blockCount;
3291 vep->parent = vnode->parent;
3292 vep->unique = vnode->uniquifier;
3293 if (*maxu < vnode->uniquifier)
3294 *maxu = vnode->uniquifier;
3295 vep->modeBits = vnode->modeBits;
3296 vep->InodeNumber = VNDISK_GET_INO(vnode);
3297 vep->type = vnode->type;
3298 vep->author = vnode->author;
3299 vep->owner = vnode->owner;
3300 vep->group = vnode->group;
3301 if (vnode->type == vDirectory) {
3302 if (class != vLarge) {
3303 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3304 vip->nAllocatedVnodes--;
3305 memset(vnode, 0, sizeof(vnode));
3306 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3307 vnodeIndexOffset(vcp, vnodeNumber),
3308 (char *)&vnode, sizeof(vnode));
3309 salvinfo->VolumeChanged = 1;
3311 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3320 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3323 struct VnodeEssence *parentvp;
3329 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3330 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3331 strcat(path, OS_DIRSEP);
3332 strcat(path, vp->name);
3338 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3339 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3342 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3344 struct VnodeEssence *vep;
3347 return (1); /* Vnode zero does not exist */
3349 return (0); /* The root dir vnode is always claimed */
3350 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3351 if (!vep || !vep->claimed)
3352 return (1); /* Vnode is not claimed - it is orphaned */
3354 return (IsVnodeOrphaned(salvinfo, vep->parent));
3358 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3359 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3360 struct DirSummary *rootdir, int *rootdirfound)
3362 static struct DirSummary dir;
3363 static struct DirHandle dirHandle;
3364 struct VnodeEssence *parent;
3365 static char path[MAXPATHLEN];
3368 if (dirVnodeInfo->vnodes[i].salvaged)
3369 return; /* already salvaged */
3372 dirVnodeInfo->vnodes[i].salvaged = 1;
3374 if (dirVnodeInfo->inodes[i] == 0)
3375 return; /* Not allocated to a directory */
3377 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3378 if (dirVnodeInfo->vnodes[i].parent) {
3379 Log("Bad parent, vnode 1; %s...\n",
3380 (Testing ? "skipping" : "salvaging"));
3381 dirVnodeInfo->vnodes[i].parent = 0;
3382 dirVnodeInfo->vnodes[i].changed = 1;
3385 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3386 if (parent && parent->salvaged == 0)
3387 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3388 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3389 rootdir, rootdirfound);
3392 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3393 dir.unique = dirVnodeInfo->vnodes[i].unique;
3396 dir.parent = dirVnodeInfo->vnodes[i].parent;
3397 dir.haveDot = dir.haveDotDot = 0;
3398 dir.ds_linkH = alinkH;
3399 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3400 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3402 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3405 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3406 (Testing ? "skipping" : "salvaging"));
3409 CopyAndSalvage(salvinfo, &dir);
3411 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3414 dirHandle = dir.dirHandle;
3417 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3418 &dirVnodeInfo->vnodes[i], path);
3421 /* If enumeration failed for random reasons, we will probably delete
3422 * too much stuff, so we guard against this instead.
3424 struct judgeEntry_params judge_params;
3425 judge_params.salvinfo = salvinfo;
3426 judge_params.dir = &dir;
3428 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3431 /* Delete the old directory if it was copied in order to salvage.
3432 * CopyOnWrite has written the new inode # to the disk, but we still
3433 * have the old one in our local structure here. Thus, we idec the
3437 if (dir.copied && !Testing) {
3438 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3439 osi_Assert(code == 0);
3440 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3443 /* Remember rootdir DirSummary _after_ it has been judged */
3444 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3445 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3453 * Get a new FID that can be used to create a new file.
3455 * @param[in] volHeader vol header for the volume
3456 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3457 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3458 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3459 * updated to the new max unique if we create a new
3463 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3464 VnodeClass class, AFSFid *afid, Unique *maxunique)
3467 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3468 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3472 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3473 /* no free vnodes; make a new one */
3474 salvinfo->vnodeInfo[class].nVnodes++;
3475 salvinfo->vnodeInfo[class].vnodes =
3476 realloc(salvinfo->vnodeInfo[class].vnodes,
3477 sizeof(struct VnodeEssence) * (i+1));
3479 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3482 afid->Vnode = bitNumberToVnodeNumber(i, class);
3484 if (volHeader->uniquifier < (*maxunique + 1)) {
3485 /* header uniq is bad; it will get bumped by 2000 later */
3486 afid->Unique = *maxunique + 1 + 2000;
3489 /* header uniq seems okay; just use that */
3490 afid->Unique = *maxunique = volHeader->uniquifier++;
3495 * Create a vnode for a README file explaining not to use a recreated-root vol.
3497 * @param[in] volHeader vol header for the volume
3498 * @param[in] alinkH ihandle for i/o for the volume
3499 * @param[in] vid volume id
3500 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3501 * updated to the new max unique if we create a new
3503 * @param[out] afid FID for the new readme vnode
3504 * @param[out] ainode the inode for the new readme file
3506 * @return operation status
3511 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3512 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3516 struct VnodeDiskObject *rvnode = NULL;
3518 IHandle_t *readmeH = NULL;
3519 struct VnodeEssence *vep;
3521 time_t now = time(NULL);
3523 /* Try to make the note brief, but informative. Only administrators should
3524 * be able to read this file at first, so we can hopefully assume they
3525 * know what AFS is, what a volume is, etc. */
3527 "This volume has been salvaged, but has lost its original root directory.\n"
3528 "The root directory that exists now has been recreated from orphan files\n"
3529 "from the rest of the volume. This recreated root directory may interfere\n"
3530 "with old cached data on clients, and there is no way the salvager can\n"
3531 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3532 "use this volume, but only copy the salvaged data to a new volume.\n"
3533 "Continuing to use this volume as it exists now may cause some clients to\n"
3534 "behave oddly when accessing this volume.\n"
3535 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3536 /* ^ the person reading this probably just lost some data, so they could
3537 * use some cheering up. */
3539 /* -1 for the trailing NUL */
3540 length = sizeof(readme) - 1;
3542 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3544 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3546 /* create the inode and write the contents */
3547 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3548 salvinfo->fileSysPath, 0, vid,
3549 afid->Vnode, afid->Unique, 1);
3550 if (!VALID_INO(readmeinode)) {
3551 Log("CreateReadme: readme IH_CREATE failed\n");
3555 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3556 bytes = IH_IWRITE(readmeH, 0, readme, length);
3557 IH_RELEASE(readmeH);
3559 if (bytes != length) {
3560 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3561 (int)sizeof(readme));
3565 /* create the vnode and write it out */
3566 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3568 Log("CreateRootDir: error alloc'ing memory\n");
3572 rvnode->type = vFile;
3574 rvnode->modeBits = 0777;
3575 rvnode->linkCount = 1;
3576 VNDISK_SET_LEN(rvnode, length);
3577 rvnode->uniquifier = afid->Unique;
3578 rvnode->dataVersion = 1;
3579 VNDISK_SET_INO(rvnode, readmeinode);
3580 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3585 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3587 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3588 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3589 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3591 if (bytes != SIZEOF_SMALLDISKVNODE) {
3592 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3593 (int)SIZEOF_SMALLDISKVNODE);
3597 /* update VnodeEssence for new readme vnode */
3598 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3600 vep->blockCount = nBlocks(length);
3601 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3602 vep->parent = rvnode->parent;
3603 vep->unique = rvnode->uniquifier;
3604 vep->modeBits = rvnode->modeBits;
3605 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3606 vep->type = rvnode->type;
3607 vep->author = rvnode->author;
3608 vep->owner = rvnode->owner;
3609 vep->group = rvnode->group;
3619 *ainode = readmeinode;
3624 if (IH_DEC(alinkH, readmeinode, vid)) {
3625 Log("CreateReadme (recovery): IH_DEC failed\n");
3637 * create a root dir for a volume that lacks one.
3639 * @param[in] volHeader vol header for the volume
3640 * @param[in] alinkH ihandle for disk access for this volume group
3641 * @param[in] vid volume id we're dealing with
3642 * @param[out] rootdir populated with info about the new root dir
3643 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3644 * updated to the new max unique if we create a new
3647 * @return operation status
3652 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3653 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3657 int decroot = 0, decreadme = 0;
3658 AFSFid did, readmeid;
3661 struct VnodeDiskObject *rootvnode = NULL;
3662 struct acl_accessList *ACL;
3665 struct VnodeEssence *vep;
3667 time_t now = time(NULL);
3669 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3670 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3674 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3675 /* We don't have any large vnodes in the volume; allocate room
3676 * for one so we can recreate the root dir */
3677 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3678 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3679 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3681 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3682 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3685 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3686 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3687 if (vep->type != vNull) {
3688 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3692 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3693 &readmeinode) != 0) {
3698 /* set the DV to a very high number, so it is unlikely that we collide
3699 * with a cached DV */
3702 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3704 if (!VALID_INO(rootinode)) {
3705 Log("CreateRootDir: IH_CREATE failed\n");
3710 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3711 rootinode, &salvinfo->VolumeChanged);
3715 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3716 Log("CreateRootDir: MakeDir failed\n");
3719 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3720 Log("CreateRootDir: Create failed\n");
3724 length = Length(&rootdir->dirHandle);
3725 DZap((void *)&rootdir->dirHandle);
3727 /* create the new root dir vnode */
3728 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3730 Log("CreateRootDir: malloc failed\n");
3734 /* only give 'rl' permissions to 'system:administrators'. We do this to
3735 * try to catch the attention of an administrator, that they should not
3736 * be writing to this directory or continue to use it. */
3737 ACL = VVnodeDiskACL(rootvnode);
3738 ACL->size = sizeof(struct acl_accessList);
3739 ACL->version = ACL_ACLVERSION;
3743 ACL->entries[0].id = -204; /* system:administrators */
3744 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3746 rootvnode->type = vDirectory;
3747 rootvnode->cloned = 0;
3748 rootvnode->modeBits = 0777;
3749 rootvnode->linkCount = 2;
3750 VNDISK_SET_LEN(rootvnode, length);
3751 rootvnode->uniquifier = 1;
3752 rootvnode->dataVersion = dv;
3753 VNDISK_SET_INO(rootvnode, rootinode);
3754 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3755 rootvnode->author = 0;
3756 rootvnode->owner = 0;
3757 rootvnode->parent = 0;
3758 rootvnode->group = 0;
3759 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3761 /* write it out to disk */
3762 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3763 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3764 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3766 if (bytes != SIZEOF_LARGEDISKVNODE) {
3767 /* just cast to int and don't worry about printing real 64-bit ints;
3768 * a large disk vnode isn't anywhere near the 32-bit limit */
3769 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3770 (int)SIZEOF_LARGEDISKVNODE);
3774 /* update VnodeEssence for the new root vnode */
3775 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3777 vep->blockCount = nBlocks(length);
3778 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3779 vep->parent = rootvnode->parent;
3780 vep->unique = rootvnode->uniquifier;
3781 vep->modeBits = rootvnode->modeBits;
3782 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3783 vep->type = rootvnode->type;
3784 vep->author = rootvnode->author;
3785 vep->owner = rootvnode->owner;
3786 vep->group = rootvnode->group;
3796 /* update DirSummary for the new root vnode */
3797 rootdir->vnodeNumber = 1;
3798 rootdir->unique = 1;
3799 rootdir->haveDot = 1;
3800 rootdir->haveDotDot = 1;
3801 rootdir->rwVid = vid;
3802 rootdir->copied = 0;
3803 rootdir->parent = 0;
3804 rootdir->name = strdup(".");
3805 rootdir->vname = volHeader->name;
3806 rootdir->ds_linkH = alinkH;
3813 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3814 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3816 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3817 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3827 * salvage a volume group.
3829 * @param[in] salvinfo information for the curent salvage job
3830 * @param[in] rwIsp inode summary for rw volume
3831 * @param[in] alinkH link table inode handle
3833 * @return operation status
3837 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3839 /* This routine, for now, will only be called for read-write volumes */
3841 int BlocksInVolume = 0, FilesInVolume = 0;
3843 struct DirSummary rootdir, oldrootdir;
3844 struct VnodeInfo *dirVnodeInfo;
3845 struct VnodeDiskObject vnode;
3846 VolumeDiskData volHeader;
3848 int orphaned, rootdirfound = 0;
3849 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3850 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3851 struct VnodeEssence *vep;
3854 afs_sfsize_t nBytes;
3856 VnodeId LFVnode, ThisVnode;
3857 Unique LFUnique, ThisUnique;
3861 vid = rwIsp->volSummary->header.id;
3862 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3863 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3864 osi_Assert(nBytes == sizeof(volHeader));
3865 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3866 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3867 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3869 DistilVnodeEssence(salvinfo, vid, vLarge,
3870 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3871 DistilVnodeEssence(salvinfo, vid, vSmall,
3872 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3874 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3875 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3876 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3877 &rootdir, &rootdirfound);
3880 nt_sync(salvinfo->fileSysDevice);
3882 sync(); /* This used to be done lower level, for every dir */
3889 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3891 Log("Cannot find root directory for volume %lu; attempting to create "
3892 "a new one\n", afs_printable_uint32_lu(vid));
3894 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3899 salvinfo->VolumeChanged = 1;
3903 /* Parse each vnode looking for orphaned vnodes and
3904 * connect them to the tree as orphaned (if requested).
3906 oldrootdir = rootdir;
3907 for (class = 0; class < nVNODECLASSES; class++) {
3908 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3909 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3910 ThisVnode = bitNumberToVnodeNumber(v, class);
3911 ThisUnique = vep->unique;
3913 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3914 continue; /* Ignore unused, claimed, and root vnodes */
3916 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3917 * entry in this vnode had incremented the parent link count (In
3918 * JudgeEntry()). We need to go to the parent and decrement that
3919 * link count. But if the parent's unique is zero, then the parent
3920 * link count was not incremented in JudgeEntry().
3922 if (class == vLarge) { /* directory vnode */
3923 pv = vnodeIdToBitNumber(vep->parent);
3924 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3925 if (vep->parent == 1 && newrootdir) {
3926 /* this vnode's parent was the volume root, and
3927 * we just created the volume root. So, the parent
3928 * dir didn't exist during JudgeEntry, so the link
3929 * count was not inc'd there, so don't dec it here.
3935 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3941 continue; /* If no rootdir, can't attach orphaned files */
3943 /* Here we attach orphaned files and directories into the
3944 * root directory, LVVnode, making sure link counts stay correct.
3946 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3947 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3948 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3950 /* Update this orphaned vnode's info. Its parent info and
3951 * link count (do for orphaned directories and files).
3953 vep->parent = LFVnode; /* Parent is the root dir */
3954 vep->unique = LFUnique;
3957 vep->count--; /* Inc link count (root dir will pt to it) */
3959 /* If this orphaned vnode is a directory, change '..'.
3960 * The name of the orphaned dir/file is unknown, so we
3961 * build a unique name. No need to CopyOnWrite the directory
3962 * since it is not connected to tree in BK or RO volume and
3963 * won't be visible there.
3965 if (class == vLarge) {
3969 /* Remove and recreate the ".." entry in this orphaned directory */
3970 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3971 salvinfo->vnodeInfo[class].inodes[v],
3972 &salvinfo->VolumeChanged);
3974 pa.Unique = LFUnique;
3975 osi_Assert(Delete(&dh, "..") == 0);
3976 osi_Assert(Create(&dh, "..", &pa) == 0);
3978 /* The original parent's link count was decremented above.
3979 * Here we increment the new parent's link count.
3981 pv = vnodeIdToBitNumber(LFVnode);
3982 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3986 /* Go to the root dir and add this entry. The link count of the
3987 * root dir was incremented when ".." was created. Try 10 times.
3989 for (j = 0; j < 10; j++) {
3990 pa.Vnode = ThisVnode;
3991 pa.Unique = ThisUnique;
3993 snprintf(npath, sizeof npath, "%s.%u.%u",
3994 ((class == vLarge) ? "__ORPHANDIR__"
3995 : "__ORPHANFILE__"),
3996 ThisVnode, ThisUnique);
3998 CopyOnWrite(salvinfo, &rootdir);
3999 code = Create(&rootdir.dirHandle, npath, &pa);
4003 ThisUnique += 50; /* Try creating a different file */
4005 osi_Assert(code == 0);
4006 Log("Attaching orphaned %s to volume's root dir as %s\n",
4007 ((class == vLarge) ? "directory" : "file"), npath);
4009 } /* for each vnode in the class */
4010 } /* for each class of vnode */
4012 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4014 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4016 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4018 osi_Assert(code == 0);
4019 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4022 DFlush(); /* Flush the changes */
4023 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4024 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4025 orphans = ORPH_IGNORE;
4028 /* Write out all changed vnodes. Orphaned files and directories
4029 * will get removed here also (if requested).
4031 for (class = 0; class < nVNODECLASSES; class++) {
4032 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4033 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4034 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4035 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4036 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4037 for (i = 0; i < nVnodes; i++) {
4038 struct VnodeEssence *vnp = &vnodes[i];
4039 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4041 /* If the vnode is good but is unclaimed (not listed in
4042 * any directory entries), then it is orphaned.
4045 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4046 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4050 if (vnp->changed || vnp->count) {
4053 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4054 vnodeIndexOffset(vcp, vnodeNumber),
4055 (char *)&vnode, sizeof(vnode));
4056 osi_Assert(nBytes == sizeof(vnode));
4058 vnode.parent = vnp->parent;
4059 oldCount = vnode.linkCount;
4060 vnode.linkCount = vnode.linkCount - vnp->count;
4063 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4065 if (!vnp->todelete) {
4066 /* Orphans should have already been attached (if requested) */
4067 osi_Assert(orphans != ORPH_ATTACH);
4068 oblocks += vnp->blockCount;
4071 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4073 BlocksInVolume -= vnp->blockCount;
4075 if (VNDISK_GET_INO(&vnode)) {
4077 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4078 osi_Assert(code == 0);
4080 memset(&vnode, 0, sizeof(vnode));
4082 } else if (vnp->count) {
4084 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4087 vnode.modeBits = vnp->modeBits;
4090 vnode.dataVersion++;
4093 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4094 vnodeIndexOffset(vcp, vnodeNumber),
4095 (char *)&vnode, sizeof(vnode));
4096 osi_Assert(nBytes == sizeof(vnode));
4098 salvinfo->VolumeChanged = 1;
4102 if (!Showmode && ofiles) {
4103 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4105 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4109 for (class = 0; class < nVNODECLASSES; class++) {
4110 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4111 for (i = 0; i < vip->nVnodes; i++)
4112 if (vip->vnodes[i].name)
4113 free(vip->vnodes[i].name);
4120 /* Set correct resource utilization statistics */
4121 volHeader.filecount = FilesInVolume;
4122 volHeader.diskused = BlocksInVolume;
4124 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4125 if (volHeader.uniquifier < (maxunique + 1)) {
4127 Log("Volume uniquifier is too low; fixed\n");
4128 /* Plus 2,000 in case there are workstations out there with
4129 * cached vnodes that have since been deleted
4131 volHeader.uniquifier = (maxunique + 1 + 2000);
4135 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4136 "Only use this salvaged volume to copy data to another volume; "
4137 "do not continue to use this volume (%lu) as-is.\n",
4138 afs_printable_uint32_lu(vid));
4141 #ifdef FSSYNC_BUILD_CLIENT
4142 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4143 afs_int32 fsync_code;
4145 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4147 Log("Error trying to tell the fileserver to break callbacks for "
4148 "changed volume %lu; error code %ld\n",
4149 afs_printable_uint32_lu(vid),
4150 afs_printable_int32_ld(fsync_code));
4152 salvinfo->VolumeChanged = 0;
4155 #endif /* FSSYNC_BUILD_CLIENT */
4157 /* Turn off the inUse bit; the volume's been salvaged! */
4158 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4159 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4160 volHeader.inService = 1; /* allow service again */
4161 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4162 volHeader.dontSalvage = DONT_SALVAGE;
4163 salvinfo->VolumeChanged = 0;
4165 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4166 osi_Assert(nBytes == sizeof(volHeader));
4169 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4170 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4171 FilesInVolume, BlocksInVolume);
4174 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4175 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4181 ClearROInUseBit(struct VolumeSummary *summary)
4183 IHandle_t *h = summary->volumeInfoHandle;
4184 afs_sfsize_t nBytes;
4186 VolumeDiskData volHeader;
4188 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4189 osi_Assert(nBytes == sizeof(volHeader));
4190 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4191 volHeader.inUse = 0;
4192 volHeader.needsSalvaged = 0;
4193 volHeader.inService = 1;
4194 volHeader.dontSalvage = DONT_SALVAGE;
4196 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4197 osi_Assert(nBytes == sizeof(volHeader));
4202 * Possible delete the volume.
4204 * deleteMe - Always do so, only a partial volume.
4207 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4208 char *message, int deleteMe, int check)
4210 if (readOnly(isp) || deleteMe) {
4211 if (isp->volSummary && isp->volSummary->fileName) {
4214 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4216 Log("It will be deleted on this server (you may find it elsewhere)\n");
4219 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4221 Log("it will be deleted instead. It should be recloned.\n");
4226 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4228 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4230 Log("Error %ld destroying volume disk header for volume %lu\n",
4231 afs_printable_int32_ld(code),
4232 afs_printable_uint32_lu(isp->volumeId));
4235 /* make sure we actually delete the fileName file; ENOENT
4236 * is fine, since VDestroyVolumeDiskHeader probably already
4238 if (unlink(path) && errno != ENOENT) {
4239 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4241 if (salvinfo->useFSYNC) {
4242 AskDelete(salvinfo, isp->volumeId);
4244 isp->volSummary->deleted = 1;
4247 } else if (!check) {
4248 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4250 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4254 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4256 * Locks a volume on disk for salvaging.
4258 * @param[in] volumeId volume ID to lock
4260 * @return operation status
4262 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4263 * checked out and locked again
4268 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4273 /* should always be WRITE_LOCK, but keep the lock-type logic all
4274 * in one place, in VVolLockType. Params will be ignored, but
4275 * try to provide what we're logically doing. */
4276 locktype = VVolLockType(V_VOLUPD, 1);
4278 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4280 if (code == EBUSY) {
4281 Abort("Someone else appears to be using volume %lu; Aborted\n",
4282 afs_printable_uint32_lu(volumeId));
4284 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4285 afs_printable_int32_ld(code),
4286 afs_printable_uint32_lu(volumeId));
4289 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4290 if (code == SYNC_DENIED) {
4291 /* need to retry checking out volumes */
4294 if (code != SYNC_OK) {
4295 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4296 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4299 /* set inUse = programType in the volume header to ensure that nobody
4300 * tries to use this volume again without salvaging, if we somehow crash
4301 * or otherwise exit before finishing the salvage.
4305 struct VolumeHeader header;
4306 struct VolumeDiskHeader diskHeader;
4307 struct VolumeDiskData volHeader;
4309 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4314 DiskToVolumeHeader(&header, &diskHeader);
4316 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4317 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4318 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4324 volHeader.inUse = programType;
4326 /* If we can't re-write the header, bail out and error. We don't
4327 * assert when reading the header, since it's possible the
4328 * header isn't really there (when there's no data associated
4329 * with the volume; we just delete the vol header file in that
4330 * case). But if it's there enough that we can read it, but
4331 * somehow we cannot write to it to signify we're salvaging it,
4332 * we've got a big problem and we cannot continue. */
4333 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4340 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4343 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4345 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4347 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4348 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4349 if (code != SYNC_OK) {
4350 Log("AskError: failed to force volume %lu into error state; "
4351 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4352 (long)code, SYNC_res2string(code));
4354 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4358 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4363 memset(&res, 0, sizeof(res));
4365 for (i = 0; i < 3; i++) {
4366 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4367 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4369 if (code == SYNC_OK) {
4371 } else if (code == SYNC_DENIED) {
4373 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4375 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4376 Abort("Salvage aborted\n");
4377 } else if (code == SYNC_BAD_COMMAND) {
4378 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4381 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4382 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4384 Log("AskOffline: fileserver is DAFS but we are not.\n");
4387 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4388 Log("AskOffline: fileserver is not DAFS but we are.\n");
4390 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4393 Abort("Salvage aborted\n");
4396 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4397 FSYNC_clientFinis();
4401 if (code != SYNC_OK) {
4402 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4403 Abort("Salvage aborted\n");
4407 /* don't want to pass around state; remember it here */
4408 static int isDAFS = -1;
4412 afs_int32 code, i, ret = 0;
4415 /* we don't care if we race. the answer shouldn't change */
4419 memset(&res, 0, sizeof(res));
4421 for (i = 0; i < 3; i++) {
4422 code = FSYNC_VolOp(1, NULL,
4423 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4425 if (code == SYNC_OK) {
4428 } else if (code == SYNC_DENIED) {
4431 } else if (code == SYNC_BAD_COMMAND) {
4434 } else if (code == SYNC_FAILED) {
4435 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4442 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4443 FSYNC_clientFinis();
4453 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4455 struct VolumeDiskHeader diskHdr;
4457 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4459 /* volume probably does not exist; no need to bring back online */
4462 AskOnline(salvinfo, volumeId);
4466 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4470 for (i = 0; i < 3; i++) {
4471 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4472 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4474 if (code == SYNC_OK) {
4476 } else if (code == SYNC_DENIED) {
4477 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4478 } else if (code == SYNC_BAD_COMMAND) {
4479 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4481 Log("AskOnline: please make sure file server binaries are same version.\n");
4485 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4486 FSYNC_clientFinis();
4493 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4498 for (i = 0; i < 3; i++) {
4499 memset(&res, 0, sizeof(res));
4500 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4501 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4503 if (code == SYNC_OK) {
4505 } else if (code == SYNC_DENIED) {
4506 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4507 } else if (code == SYNC_BAD_COMMAND) {
4508 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4511 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4512 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4514 Log("AskOnline: fileserver is DAFS but we are not.\n");
4517 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4518 Log("AskOnline: fileserver is not DAFS but we are.\n");
4520 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4524 } else if (code == SYNC_FAILED &&
4525 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4526 res.hdr.reason == FSYNC_WRONG_PART)) {
4527 /* volume is already effectively 'deleted' */
4531 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4532 FSYNC_clientFinis();
4539 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4541 /* Volume parameter is passed in case iopen is upgraded in future to
4542 * require a volume Id to be passed
4545 IHandle_t *srcH, *destH;
4546 FdHandle_t *srcFdP, *destFdP;
4548 afs_foff_t size = 0;
4550 IH_INIT(srcH, device, rwvolume, inode1);
4551 srcFdP = IH_OPEN(srcH);
4552 osi_Assert(srcFdP != NULL);
4553 IH_INIT(destH, device, rwvolume, inode2);
4554 destFdP = IH_OPEN(destH);
4555 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4556 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4559 osi_Assert(nBytes == 0);
4560 FDH_REALLYCLOSE(srcFdP);
4561 FDH_REALLYCLOSE(destFdP);
4568 PrintInodeList(struct SalvInfo *salvinfo)
4570 struct ViceInodeInfo *ip;
4571 struct ViceInodeInfo *buf;
4574 afs_sfsize_t st_size;
4576 st_size = OS_SIZE(salvinfo->inodeFd);
4577 osi_Assert(st_size >= 0);
4578 buf = (struct ViceInodeInfo *)malloc(st_size);
4579 osi_Assert(buf != NULL);
4580 nInodes = st_size / sizeof(struct ViceInodeInfo);
4581 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4582 for (ip = buf; nInodes--; ip++) {
4583 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4584 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4585 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4586 ip->u.param[2], ip->u.param[3]);
4592 PrintInodeSummary(struct SalvInfo *salvinfo)
4595 struct InodeSummary *isp;
4597 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4598 isp = &salvinfo->inodeSummary[i];
4599 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4604 PrintVolumeSummary(struct SalvInfo *salvinfo)
4607 struct VolumeSummary *vsp;
4609 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4610 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4620 osi_Assert(0); /* Fork is never executed in the NT code path */
4624 #ifdef AFS_DEMAND_ATTACH_FS
4625 if ((f == 0) && (programType == salvageServer)) {
4626 /* we are a salvageserver child */
4627 #ifdef FSSYNC_BUILD_CLIENT
4628 VChildProcReconnectFS_r();
4630 #ifdef SALVSYNC_BUILD_CLIENT
4634 #endif /* AFS_DEMAND_ATTACH_FS */
4635 #endif /* !AFS_NT40_ENV */
4645 #ifdef AFS_DEMAND_ATTACH_FS
4646 if (programType == salvageServer) {
4647 #ifdef SALVSYNC_BUILD_CLIENT
4650 #ifdef FSSYNC_BUILD_CLIENT
4654 #endif /* AFS_DEMAND_ATTACH_FS */
4657 if (main_thread != pthread_self())
4658 pthread_exit((void *)code);
4671 pid = wait(&status);
4672 osi_Assert(pid != -1);
4673 if (WCOREDUMP(status))
4674 Log("\"%s\" core dumped!\n", prog);
4675 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4681 TimeStamp(time_t clock, int precision)
4684 static char timestamp[20];
4685 lt = localtime(&clock);
4687 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4689 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4694 CheckLogFile(char * log_path)
4696 char oldSlvgLog[AFSDIR_PATH_MAX];
4698 #ifndef AFS_NT40_ENV
4705 strcpy(oldSlvgLog, log_path);
4706 strcat(oldSlvgLog, ".old");
4708 renamefile(log_path, oldSlvgLog);
4709 logFile = afs_fopen(log_path, "a");
4711 if (!logFile) { /* still nothing, use stdout */
4715 #ifndef AFS_NAMEI_ENV
4716 AFS_DEBUG_IOPS_LOG(logFile);
4721 #ifndef AFS_NT40_ENV
4723 TimeStampLogFile(char * log_path)
4725 char stampSlvgLog[AFSDIR_PATH_MAX];
4730 lt = localtime(&now);
4731 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4732 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4733 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4734 lt->tm_min, lt->tm_sec);
4736 /* try to link the logfile to a timestamped filename */
4737 /* if it fails, oh well, nothing we can do */
4738 link(log_path, stampSlvgLog);
4747 #ifndef AFS_NT40_ENV
4749 printf("Can't show log since using syslog.\n");
4760 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4763 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4766 while (fgets(line, sizeof(line), logFile))
4773 Log(const char *format, ...)
4779 va_start(args, format);
4780 vsnprintf(tmp, sizeof tmp, format, args);
4782 #ifndef AFS_NT40_ENV
4784 syslog(LOG_INFO, "%s", tmp);
4788 gettimeofday(&now, NULL);
4789 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4795 Abort(const char *format, ...)
4800 va_start(args, format);
4801 vsnprintf(tmp, sizeof tmp, format, args);
4803 #ifndef AFS_NT40_ENV
4805 syslog(LOG_INFO, "%s", tmp);
4809 fprintf(logFile, "%s", tmp);
4821 ToString(const char *s)
4824 p = (char *)malloc(strlen(s) + 1);
4825 osi_Assert(p != NULL);
4830 /* Remove the FORCESALVAGE file */
4832 RemoveTheForce(char *path)
4835 struct afs_stat_st force; /* so we can use afs_stat to find it */
4836 strcpy(target,path);
4837 strcat(target,"/FORCESALVAGE");
4838 if (!Testing && ForceSalvage) {
4839 if (afs_stat(target,&force) == 0) unlink(target);
4843 #ifndef AFS_AIX32_ENV
4845 * UseTheForceLuke - see if we can use the force
4848 UseTheForceLuke(char *path)
4850 struct afs_stat_st force;
4852 strcpy(target,path);
4853 strcat(target,"/FORCESALVAGE");
4855 return (afs_stat(target, &force) == 0);
4859 * UseTheForceLuke - see if we can use the force
4862 * The VRMIX fsck will not muck with the filesystem it is supposedly
4863 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4864 * muck directly with the root inode, which is within the normal
4866 * ListViceInodes() has a side effect of setting ForceSalvage if
4867 * it detects a need, based on root inode examination.
4870 UseTheForceLuke(char *path)
4873 return 0; /* sorry OB1 */
4878 /* NT support routines */
4880 static char execpathname[MAX_PATH];
4882 nt_SalvagePartition(char *partName, int jobn)
4887 if (!*execpathname) {
4888 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4889 if (!n || n == 1023)
4892 job.cj_magic = SALVAGER_MAGIC;
4893 job.cj_number = jobn;
4894 (void)strcpy(job.cj_part, partName);
4895 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4900 nt_SetupPartitionSalvage(void *datap, int len)
4902 childJob_t *jobp = (childJob_t *) datap;
4903 char logname[AFSDIR_PATH_MAX];
4905 if (len != sizeof(childJob_t))
4907 if (jobp->cj_magic != SALVAGER_MAGIC)
4912 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4914 logFile = afs_fopen(logname, "w");
4922 #endif /* AFS_NT40_ENV */