2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #include <afs/afs_assert.h>
105 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
106 #if defined(AFS_VFSINCL_ENV)
107 #include <sys/vnode.h>
109 #include <sys/fs/ufs_inode.h>
111 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
112 #include <ufs/ufs/dinode.h>
113 #include <ufs/ffs/fs.h>
115 #include <ufs/inode.h>
118 #else /* AFS_VFSINCL_ENV */
120 #include <ufs/inode.h>
121 #else /* AFS_OSF_ENV */
122 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
123 #include <sys/inode.h>
126 #endif /* AFS_VFSINCL_ENV */
127 #endif /* AFS_SGI_ENV */
130 #include <sys/lockf.h>
133 #include <checklist.h>
135 #if defined(AFS_SGI_ENV)
138 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
140 #include <sys/mnttab.h>
141 #include <sys/mntent.h>
146 #endif /* AFS_SGI_ENV */
147 #endif /* AFS_HPUX_ENV */
151 #include <afs/osi_inode.h>
155 #include <afs/afsutil.h>
156 #include <afs/fileutil.h>
161 #include <afs/afssyscalls.h>
165 #include "partition.h"
166 #include "daemon_com.h"
167 #include "daemon_com_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[8]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = (struct job *)malloc(sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 memset(thisjob, 0, sizeof(struct job));
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 osi_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 osi_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
615 openlog("salvager", LOG_PID, useSyslogFacility);
619 snprintf(logFileName, sizeof logFileName, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb);
622 logFile = afs_fopen(logFileName, "w");
627 SalvageFileSys1(jobs[startjob]->partP, 0);
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
634 /* If waited for all jobs to complete, now collect log files and return */
636 if (!useSyslog) /* if syslogging - no need to collect */
639 for (i = 0; i < jobcount; i++) {
640 snprintf(logFileName, sizeof logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
642 if ((passLog = afs_fopen(logFileName, "r"))) {
643 while (fgets(buf, sizeof(buf), passLog)) {
648 (void)unlink(logFileName);
657 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
659 if (!canfork || debug || Fork() == 0) {
660 SalvageFileSys1(partP, singleVolumeNumber);
661 if (canfork && !debug) {
666 Wait("SalvageFileSys");
670 get_DevName(char *pbuffer, char *wpath)
672 char pbuf[128], *ptr;
673 strcpy(pbuf, pbuffer);
674 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
680 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
682 strcpy(pbuffer, ptr + 1);
689 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 char inodeListPath[256];
693 FD_t inodeFile = INVALID_FD;
694 static char tmpDevName[100];
695 static char wpath[100];
696 struct VolumeSummary *vsp, *esp;
700 struct SalvInfo l_salvinfo;
701 struct SalvInfo *salvinfo = &l_salvinfo;
704 memset(salvinfo, 0, sizeof(*salvinfo));
707 if (inodeFile != INVALID_FD) {
709 inodeFile = INVALID_FD;
711 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
712 Abort("Raced too many times with fileserver restarts while trying to "
713 "checkout/lock volumes; Aborted\n");
715 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
717 /* unlock all previous volume locks, since we're about to lock them
719 VLockFileReinit(&partP->volLockFile);
721 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
723 salvinfo->fileSysPartition = partP;
724 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
725 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
730 name = partP->devName;
732 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 salvinfo->fileSysDeviceName = name;
736 salvinfo->filesysfulldev = wpath;
739 if (singleVolumeNumber) {
740 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
741 /* only non-DAFS locks the partition when salvaging a single volume;
742 * DAFS will lock the individual volumes in the VG */
743 VLockPartition(partP->name);
744 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
748 /* salvageserver already setup fssync conn for us */
749 if ((programType != salvageServer) && !VConnectFS()) {
750 Abort("Couldn't connect to file server\n");
753 salvinfo->useFSYNC = 1;
754 AskOffline(salvinfo, singleVolumeNumber);
755 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
756 if (LockVolume(salvinfo, singleVolumeNumber)) {
759 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
762 salvinfo->useFSYNC = 0;
763 VLockPartition(partP->name);
767 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
770 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
771 partP->name, name, (Testing ? "(READONLY mode)" : ""));
773 Log("***Forced salvage of all volumes on this partition***\n");
778 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
785 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
786 while ((dp = readdir(dirp))) {
787 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
788 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
790 Log("Removing old salvager temp files %s\n", dp->d_name);
791 strcpy(npath, salvinfo->fileSysPath);
792 strcat(npath, OS_DIRSEP);
793 strcat(npath, dp->d_name);
799 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
801 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
802 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
804 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
808 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
809 if (inodeFile == INVALID_FD) {
810 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
813 /* Using nt_unlink here since we're really using the delete on close
814 * semantics of unlink. In most places in the salvager, we really do
815 * mean to unlink the file at that point. Those places have been
816 * modified to actually do that so that the NT crt can be used there.
818 * jaltman - On NT delete on close cannot be applied to a file while the
819 * process has an open file handle that does not have DELETE file
820 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
821 * delete privileges. As a result the nt_unlink() call will always
824 code = nt_unlink(inodeListPath);
826 code = unlink(inodeListPath);
829 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
832 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
836 salvinfo->inodeFd = inodeFile;
837 if (salvinfo->inodeFd == INVALID_FD)
838 Abort("Temporary file %s is missing...\n", inodeListPath);
839 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
840 if (ListInodeOption) {
841 PrintInodeList(salvinfo);
842 if (singleVolumeNumber) {
843 /* We've checked out the volume from the fileserver, and we need
844 * to give it back. We don't know if the volume exists or not,
845 * so we don't know whether to AskOnline or not. Try to determine
846 * if the volume exists by trying to read the volume header, and
847 * AskOnline if it is readable. */
848 MaybeAskOnline(salvinfo, singleVolumeNumber);
852 /* enumerate volumes in the partition.
853 * figure out sets of read-only + rw volumes.
854 * salvage each set, read-only volumes first, then read-write.
855 * Fix up inodes on last volume in set (whether it is read-write
858 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
862 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
863 i < salvinfo->nVolumesInInodeFile; i = j) {
864 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
866 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
868 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
869 struct VolumeSummary *tsp;
870 /* Scan volume list (from partition root directory) looking for the
871 * current rw volume number in the volume list from the inode scan.
872 * If there is one here that is not in the inode volume list,
874 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
876 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
878 /* Now match up the volume summary info from the root directory with the
879 * entry in the volume list obtained from scanning inodes */
880 salvinfo->inodeSummary[j].volSummary = NULL;
881 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
882 if (tsp->header.id == vid) {
883 salvinfo->inodeSummary[j].volSummary = tsp;
889 /* Salvage the group of volumes (several read-only + 1 read/write)
890 * starting with the current read-only volume we're looking at.
892 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
895 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
896 for (; vsp < esp; vsp++) {
898 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
901 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
902 RemoveTheForce(salvinfo->fileSysPath);
904 if (!Testing && singleVolumeNumber) {
906 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
907 /* unlock vol headers so the fs can attach them when we AskOnline */
908 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
909 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
911 /* Step through the volumeSummary list and set all volumes on-line.
912 * Most volumes were taken off-line in GetVolumeSummary.
913 * If a volume was deleted, don't tell the fileserver anything, since
914 * we already told the fileserver the volume was deleted back when we
915 * we destroyed the volume header.
916 * Also, make sure we bring the singleVolumeNumber back online first.
919 for (j = 0; j < salvinfo->nVolumes; j++) {
920 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
922 if (!salvinfo->volumeSummaryp[j].deleted) {
923 AskOnline(salvinfo, singleVolumeNumber);
929 /* If singleVolumeNumber is not in our volumeSummary, it means that
930 * at least one other volume in the VG is on the partition, but the
931 * RW volume is not. We've already AskOffline'd it by now, though,
932 * so make sure we don't still have the volume checked out. */
933 AskDelete(salvinfo, singleVolumeNumber);
936 for (j = 0; j < salvinfo->nVolumes; j++) {
937 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
938 if (!salvinfo->volumeSummaryp[j].deleted) {
939 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
945 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
946 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
949 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
953 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
956 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
959 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
962 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
964 Log("Error %ld destroying volume disk header for volume %lu\n",
965 afs_printable_int32_ld(code),
966 afs_printable_uint32_lu(vsp->header.id));
969 /* make sure we actually delete the fileName file; ENOENT
970 * is fine, since VDestroyVolumeDiskHeader probably already
972 if (unlink(path) && errno != ENOENT) {
973 Log("Unable to unlink %s (errno = %d)\n", path, errno);
975 if (salvinfo->useFSYNC) {
976 AskDelete(salvinfo, vsp->header.id);
984 CompareInodes(const void *_p1, const void *_p2)
986 const struct ViceInodeInfo *p1 = _p1;
987 const struct ViceInodeInfo *p2 = _p2;
988 if (p1->u.vnode.vnodeNumber == INODESPECIAL
989 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
990 VolumeId p1rwid, p2rwid;
992 (p1->u.vnode.vnodeNumber ==
993 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
995 (p2->u.vnode.vnodeNumber ==
996 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1001 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1002 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1003 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1004 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1005 if (p1->u.vnode.volumeId == p1rwid)
1007 if (p2->u.vnode.volumeId == p2rwid)
1009 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1011 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1012 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1013 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1015 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1017 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1019 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1021 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1023 /* The following tests are reversed, so that the most desirable
1024 * of several similar inodes comes first */
1025 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1026 #ifdef AFS_3DISPARES
1027 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1028 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1031 #ifdef AFS_SGI_EXMAG
1032 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1033 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1038 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1039 #ifdef AFS_3DISPARES
1040 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1041 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1044 #ifdef AFS_SGI_EXMAG
1045 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1046 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1051 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1052 #ifdef AFS_3DISPARES
1053 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1054 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1057 #ifdef AFS_SGI_EXMAG
1058 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1059 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1064 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1065 #ifdef AFS_3DISPARES
1066 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1067 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1070 #ifdef AFS_SGI_EXMAG
1071 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1072 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1081 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1082 struct InodeSummary *summary)
1084 VolumeId volume = ip->u.vnode.volumeId;
1085 VolumeId rwvolume = volume;
1090 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1092 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1094 rwvolume = ip->u.special.parentId;
1095 /* This isn't quite right, as there could (in error) be different
1096 * parent inodes in different special vnodes */
1098 if (maxunique < ip->u.vnode.vnodeUniquifier)
1099 maxunique = ip->u.vnode.vnodeUniquifier;
1103 summary->volumeId = volume;
1104 summary->RWvolumeId = rwvolume;
1105 summary->nInodes = n;
1106 summary->nSpecialInodes = nSpecial;
1107 summary->maxUniquifier = maxunique;
1111 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1113 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1114 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1115 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1120 * Collect list of inodes in file named by path. If a truly fatal error,
1121 * unlink the file and abort. For lessor errors, return -1. The file will
1122 * be unlinked by the caller.
1125 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1129 struct ViceInodeInfo *ip, *ip_save;
1130 struct InodeSummary summary;
1131 char summaryFileName[50];
1132 FD_t summaryFile = INVALID_FD;
1134 char *dev = salvinfo->fileSysPath;
1135 char *wpath = salvinfo->fileSysPath;
1137 char *dev = salvinfo->fileSysDeviceName;
1138 char *wpath = salvinfo->filesysfulldev;
1140 char *part = salvinfo->fileSysPath;
1145 afs_sfsize_t st_size;
1147 /* This file used to come from vfsck; cobble it up ourselves now... */
1149 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1150 singleVolumeNumber ? OnlyOneVolume : 0,
1151 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1153 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1157 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1159 if (forceSal && !ForceSalvage) {
1160 Log("***Forced salvage of all volumes on this partition***\n");
1163 OS_SEEK(inodeFile, 0L, SEEK_SET);
1164 salvinfo->inodeFd = inodeFile;
1165 if (salvinfo->inodeFd == INVALID_FD ||
1166 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1167 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1169 tdir = (tmpdir ? tmpdir : part);
1171 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1172 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1174 snprintf(summaryFileName, sizeof summaryFileName,
1175 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1177 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1178 if (summaryFile == INVALID_FD) {
1179 Abort("Unable to create inode summary file\n");
1183 /* Using nt_unlink here since we're really using the delete on close
1184 * semantics of unlink. In most places in the salvager, we really do
1185 * mean to unlink the file at that point. Those places have been
1186 * modified to actually do that so that the NT crt can be used there.
1188 * jaltman - As commented elsewhere, this cannot work because fopen()
1189 * does not open files with DELETE and FILE_SHARE_DELETE.
1191 code = nt_unlink(summaryFileName);
1193 code = unlink(summaryFileName);
1196 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1199 if (!canfork || debug || Fork() == 0) {
1200 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1202 OS_CLOSE(summaryFile);
1203 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1204 RemoveTheForce(salvinfo->fileSysPath);
1206 struct VolumeSummary *vsp;
1210 GetVolumeSummary(salvinfo, singleVolumeNumber);
1212 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1213 if (vsp->fileName) {
1214 if (vsp->header.id == singleVolumeNumber) {
1217 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1223 MaybeAskOnline(salvinfo, singleVolumeNumber);
1225 /* make sure we get rid of stray .vol headers, even if
1226 * they're not in our volume summary (might happen if
1227 * e.g. something else created them and they're not in the
1228 * fileserver VGC) */
1229 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1230 singleVolumeNumber, 0 /*parent*/);
1231 AskDelete(salvinfo, singleVolumeNumber);
1235 Log("%s vice inodes on %s; not salvaged\n",
1236 singleVolumeNumber ? "No applicable" : "No", dev);
1241 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1243 OS_CLOSE(summaryFile);
1245 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1248 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1249 OS_CLOSE(summaryFile);
1250 Abort("Unable to read inode table; %s not salvaged\n", dev);
1252 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1253 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1254 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1255 OS_CLOSE(summaryFile);
1256 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1261 CountVolumeInodes(ip, nInodes, &summary);
1262 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1263 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1264 OS_CLOSE(summaryFile);
1268 summary.index += (summary.nInodes);
1269 nInodes -= summary.nInodes;
1270 ip += summary.nInodes;
1273 ip = ip_save = NULL;
1274 /* Following fflush is not fclose, because if it was debug mode would not work */
1275 if (OS_SYNC(summaryFile) == -1) {
1276 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1277 OS_CLOSE(summaryFile);
1281 if (canfork && !debug) {
1286 if (Wait("Inode summary") == -1) {
1287 OS_CLOSE(summaryFile);
1288 Exit(1); /* salvage of this partition aborted */
1292 st_size = OS_SIZE(summaryFile);
1293 osi_Assert(st_size >= 0);
1296 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1297 osi_Assert(salvinfo->inodeSummary != NULL);
1298 /* For GNU we need to do lseek to get the file pointer moved. */
1299 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1300 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1301 osi_Assert(ret == st_size);
1303 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1304 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1305 salvinfo->inodeSummary[i].volSummary = NULL;
1307 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1308 OS_CLOSE(summaryFile);
1311 if (retcode && singleVolumeNumber && !deleted) {
1312 AskError(salvinfo, singleVolumeNumber);
1318 /* Comparison routine for volume sort.
1319 This is setup so that a read-write volume comes immediately before
1320 any read-only clones of that volume */
1322 CompareVolumes(const void *_p1, const void *_p2)
1324 const struct VolumeSummary *p1 = _p1;
1325 const struct VolumeSummary *p2 = _p2;
1326 if (p1->header.parent != p2->header.parent)
1327 return p1->header.parent < p2->header.parent ? -1 : 1;
1328 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1330 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1332 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1336 * Gleans volumeSummary information by asking the fileserver
1338 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1339 * salvaging a whole partition
1341 * @return whether we obtained the volume summary information or not
1342 * @retval 0 success; we obtained the volume summary information
1343 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1345 * @retval 1 we did not get the volume summary information; either the
1346 * fileserver responded with an error, or we are not supposed to
1347 * ask the fileserver for the information (e.g. we are salvaging
1348 * the entire partition or we are not the salvageserver)
1350 * @note for non-DAFS, always returns 1
1353 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1356 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1357 if (programType == salvageServer) {
1358 if (singleVolumeNumber) {
1359 FSSYNC_VGQry_response_t q_res;
1361 struct VolumeSummary *vsp;
1363 struct VolumeDiskHeader diskHdr;
1365 memset(&res, 0, sizeof(res));
1367 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1370 * We must wait for the partition to finish scanning before
1371 * can continue, since we will not know if we got the entire
1372 * VG membership unless the partition is fully scanned.
1373 * We could, in theory, just scan the partition ourselves if
1374 * the VG cache is not ready, but we would be doing the exact
1375 * same scan the fileserver is doing; it will almost always
1376 * be faster to wait for the fileserver. The only exceptions
1377 * are if the partition does not take very long to scan, and
1378 * in that case it's fast either way, so who cares?
1380 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1381 Log("waiting for fileserver to finish scanning partition %s...\n",
1382 salvinfo->fileSysPartition->name);
1384 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1385 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1386 * just so small partitions don't need to wait over 10
1387 * seconds every time, and large partitions are generally
1388 * polled only once every ten seconds. */
1389 sleep((i > 10) ? (i = 10) : i);
1391 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1395 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1396 /* This can happen if there's no header for the volume
1397 * we're salvaging, or no headers exist for the VG (if
1398 * we're salvaging an RW). Act as if we got a response
1399 * with no VG members. The headers may be created during
1400 * salvaging, if there are inodes in this VG. */
1402 memset(&q_res, 0, sizeof(q_res));
1403 q_res.rw = singleVolumeNumber;
1407 Log("fileserver refused VGCQuery request for volume %lu on "
1408 "partition %s, code %ld reason %ld\n",
1409 afs_printable_uint32_lu(singleVolumeNumber),
1410 salvinfo->fileSysPartition->name,
1411 afs_printable_int32_ld(code),
1412 afs_printable_int32_ld(res.hdr.reason));
1416 if (q_res.rw != singleVolumeNumber) {
1417 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1418 afs_printable_uint32_lu(singleVolumeNumber),
1419 afs_printable_uint32_lu(q_res.rw));
1420 #ifdef SALVSYNC_BUILD_CLIENT
1421 if (SALVSYNC_LinkVolume(q_res.rw,
1423 salvinfo->fileSysPartition->name,
1425 Log("schedule request failed\n");
1427 #endif /* SALVSYNC_BUILD_CLIENT */
1428 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1431 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1432 osi_Assert(salvinfo->volumeSummaryp != NULL);
1434 salvinfo->nVolumes = 0;
1435 vsp = salvinfo->volumeSummaryp;
1437 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1438 char name[VMAXPATHLEN];
1440 if (!q_res.children[i]) {
1444 /* AskOffline for singleVolumeNumber was called much earlier */
1445 if (q_res.children[i] != singleVolumeNumber) {
1446 AskOffline(salvinfo, q_res.children[i]);
1447 if (LockVolume(salvinfo, q_res.children[i])) {
1453 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1455 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1456 afs_printable_uint32_lu(q_res.children[i]));
1461 DiskToVolumeHeader(&vsp->header, &diskHdr);
1462 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1463 vsp->fileName = ToString(name);
1464 salvinfo->nVolumes++;
1468 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1473 Log("Cannot get volume summary from fileserver; falling back to scanning "
1474 "entire partition\n");
1477 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1482 * count how many volume headers are found by VWalkVolumeHeaders.
1484 * @param[in] dp the disk partition (unused)
1485 * @param[in] name full path to the .vol header (unused)
1486 * @param[in] hdr the header data (unused)
1487 * @param[in] last whether this is the last try or not (unused)
1488 * @param[in] rock actually an afs_int32*; the running count of how many
1489 * volumes we have found
1494 CountHeader(struct DiskPartition64 *dp, const char *name,
1495 struct VolumeDiskHeader *hdr, int last, void *rock)
1497 afs_int32 *nvols = (afs_int32 *)rock;
1503 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1506 struct SalvageScanParams {
1507 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1508 * vol id of the VG we're salvaging */
1509 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1510 * we're filling in */
1511 afs_int32 nVolumes; /**< # of vols we've encountered */
1512 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1513 * # of vols we've alloc'd memory for) */
1514 int retry; /**< do we need to retry vol lock/checkout? */
1515 struct SalvInfo *salvinfo; /**< salvage job info */
1519 * records volume summary info found from VWalkVolumeHeaders.
1521 * Found volumes are also taken offline if they are in the specific volume
1522 * group we are looking for.
1524 * @param[in] dp the disk partition
1525 * @param[in] name full path to the .vol header
1526 * @param[in] hdr the header data
1527 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1528 * @param[in] rock actually a struct SalvageScanParams*, containing the
1529 * information needed to record the volume summary data
1531 * @return operation status
1533 * @retval -1 volume locking raced with fileserver restart; checking out
1534 * and locking volumes needs to be retried
1535 * @retval 1 volume header is mis-named and should be deleted
1538 RecordHeader(struct DiskPartition64 *dp, const char *name,
1539 struct VolumeDiskHeader *hdr, int last, void *rock)
1541 char nameShouldBe[64];
1542 struct SalvageScanParams *params;
1543 struct VolumeSummary summary;
1544 VolumeId singleVolumeNumber;
1545 struct SalvInfo *salvinfo;
1547 params = (struct SalvageScanParams *)rock;
1549 memset(&summary, 0, sizeof(summary));
1551 singleVolumeNumber = params->singleVolumeNumber;
1552 salvinfo = params->salvinfo;
1554 DiskToVolumeHeader(&summary.header, hdr);
1556 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1557 && summary.header.parent != singleVolumeNumber) {
1559 if (programType == salvageServer) {
1560 #ifdef SALVSYNC_BUILD_CLIENT
1561 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1562 summary.header.id, summary.header.parent);
1563 if (SALVSYNC_LinkVolume(summary.header.parent,
1567 Log("schedule request failed\n");
1570 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1573 Log("%u is a read-only volume; not salvaged\n",
1574 singleVolumeNumber);
1579 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1580 || summary.header.parent == singleVolumeNumber) {
1582 /* check if the header file is incorrectly named */
1584 const char *base = strrchr(name, OS_DIRSEPC);
1591 snprintf(nameShouldBe, sizeof nameShouldBe,
1592 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1595 if (strcmp(nameShouldBe, base)) {
1596 /* .vol file has wrong name; retry/delete */
1600 if (!badname || last) {
1601 /* only offline the volume if the header is good, or if this is
1602 * the last try looking at it; avoid AskOffline'ing the same vol
1605 if (singleVolumeNumber
1606 && summary.header.id != singleVolumeNumber) {
1607 /* don't offline singleVolumeNumber; we already did that
1610 AskOffline(salvinfo, summary.header.id);
1612 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1614 /* don't lock the volume if the header is bad, since we're
1615 * about to delete it anyway. */
1616 if (LockVolume(salvinfo, summary.header.id)) {
1621 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1625 if (last && !Showmode) {
1626 Log("Volume header file %s is incorrectly named (should be %s "
1627 "not %s); %sdeleted (it will be recreated later, if "
1628 "necessary)\n", name, nameShouldBe, base,
1629 (Testing ? "it would have been " : ""));
1634 summary.fileName = ToString(base);
1637 if (params->nVolumes > params->totalVolumes) {
1638 /* We found more volumes than we found on the first partition walk;
1639 * apparently something created a volume while we were
1640 * partition-salvaging, or we found more than 20 vols when salvaging a
1641 * particular volume. Abort if we detect this, since other programs
1642 * supposed to not touch the partition while it is partition-salvaging,
1643 * and we shouldn't find more than 20 vols in a VG.
1645 Abort("Found %ld vol headers, but should have found at most %ld! "
1646 "Make sure the volserver/fileserver are not running at the "
1647 "same time as a partition salvage\n",
1648 afs_printable_int32_ld(params->nVolumes),
1649 afs_printable_int32_ld(params->totalVolumes));
1652 memcpy(params->vsp, &summary, sizeof(summary));
1660 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1662 * If the header could not be read in at all, the header is always unlinked.
1663 * If instead RecordHeader said the header was bad (that is, the header file
1664 * is mis-named), we only unlink if we are doing a partition salvage, as
1665 * opposed to salvaging a specific volume group.
1667 * @param[in] dp the disk partition
1668 * @param[in] name full path to the .vol header
1669 * @param[in] hdr header data, or NULL if the header could not be read
1670 * @param[in] rock actually a struct SalvageScanParams*, with some information
1674 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1675 struct VolumeDiskHeader *hdr, void *rock)
1677 struct SalvageScanParams *params;
1680 params = (struct SalvageScanParams *)rock;
1683 /* no header; header is too bogus to read in at all */
1685 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1691 } else if (!params->singleVolumeNumber) {
1692 /* We were able to read in a header, but RecordHeader said something
1693 * was wrong with it. We only unlink those if we are doing a partition
1700 if (dounlink && unlink(name)) {
1701 Log("Error %d while trying to unlink %s\n", errno, name);
1706 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1707 * the fileserver for VG information, or by scanning the /vicepX partition.
1709 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1710 * are salvaging, or 0 if this is a partition
1713 * @return operation status
1715 * @retval -1 we raced with a fileserver restart; checking out and locking
1716 * volumes must be retried
1719 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1721 afs_int32 nvols = 0;
1722 struct SalvageScanParams params;
1725 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1727 /* we successfully got the vol information from the fileserver; no
1728 * need to scan the partition */
1732 /* we need to retry volume checkout */
1736 if (!singleVolumeNumber) {
1737 /* Count how many volumes we have in /vicepX */
1738 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1741 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1746 nvols = VOL_VG_MAX_VOLS;
1749 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1750 osi_Assert(salvinfo->volumeSummaryp != NULL);
1752 params.singleVolumeNumber = singleVolumeNumber;
1753 params.vsp = salvinfo->volumeSummaryp;
1754 params.nVolumes = 0;
1755 params.totalVolumes = nvols;
1757 params.salvinfo = salvinfo;
1759 /* walk the partition directory of volume headers and record the info
1760 * about them; unlinking invalid headers */
1761 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1762 UnlinkHeader, ¶ms);
1764 /* we apparently need to retry checking-out/locking volumes */
1768 Abort("Failed to get volume header summary\n");
1770 salvinfo->nVolumes = params.nVolumes;
1772 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1778 /* Find the link table. This should be associated with the RW volume or, if
1779 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1782 FindLinkHandle(struct InodeSummary *isp, int nVols,
1783 struct ViceInodeInfo *allInodes)
1786 struct ViceInodeInfo *ip;
1788 for (i = 0; i < nVols; i++) {
1789 ip = allInodes + isp[i].index;
1790 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1791 if (ip[j].u.special.type == VI_LINKTABLE)
1792 return ip[j].inodeNumber;
1799 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1801 struct versionStamp version;
1804 if (!VALID_INO(ino))
1806 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1807 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1808 if (!VALID_INO(ino))
1810 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1811 isp->RWvolumeId, errno);
1812 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1813 fdP = IH_OPEN(salvinfo->VGLinkH);
1815 Abort("Can't open link table for volume %u (error = %d)\n",
1816 isp->RWvolumeId, errno);
1818 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1819 Abort("Can't truncate link table for volume %u (error = %d)\n",
1820 isp->RWvolumeId, errno);
1822 version.magic = LINKTABLEMAGIC;
1823 version.version = LINKTABLEVERSION;
1825 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1827 Abort("Can't truncate link table for volume %u (error = %d)\n",
1828 isp->RWvolumeId, errno);
1830 FDH_REALLYCLOSE(fdP);
1832 /* If the volume summary exits (i.e., the V*.vol header file exists),
1833 * then set this inode there as well.
1835 if (isp->volSummary)
1836 isp->volSummary->header.linkTable = ino;
1845 SVGParms_t *parms = (SVGParms_t *) arg;
1846 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1851 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1854 pthread_attr_t tattr;
1858 /* Initialize per volume global variables, even if later code does so */
1859 salvinfo->VolumeChanged = 0;
1860 salvinfo->VGLinkH = NULL;
1861 salvinfo->VGLinkH_cnt = 0;
1862 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1864 parms.svgp_inodeSummaryp = isp;
1865 parms.svgp_count = nVols;
1866 parms.svgp_salvinfo = salvinfo;
1867 code = pthread_attr_init(&tattr);
1869 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1873 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1875 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1878 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1880 Log("Failed to create thread to salvage volume group %u\n",
1884 (void)pthread_join(tid, NULL);
1886 #endif /* AFS_NT40_ENV */
1889 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1891 struct ViceInodeInfo *inodes, *allInodes, *ip;
1892 int i, totalInodes, size, salvageTo;
1896 int dec_VGLinkH = 0;
1898 FdHandle_t *fdP = NULL;
1900 salvinfo->VGLinkH_cnt = 0;
1901 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1902 && isp->nSpecialInodes > 0);
1903 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1904 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1907 if (ShowMounts && !haveRWvolume)
1909 if (canfork && !debug && Fork() != 0) {
1910 (void)Wait("Salvage volume group");
1913 for (i = 0, totalInodes = 0; i < nVols; i++)
1914 totalInodes += isp[i].nInodes;
1915 size = totalInodes * sizeof(struct ViceInodeInfo);
1916 inodes = (struct ViceInodeInfo *)malloc(size);
1917 allInodes = inodes - isp->index; /* this would the base of all the inodes
1918 * for the partition, if all the inodes
1919 * had been read into memory */
1921 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1923 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1925 /* Don't try to salvage a read write volume if there isn't one on this
1927 salvageTo = haveRWvolume ? 0 : 1;
1929 #ifdef AFS_NAMEI_ENV
1930 ino = FindLinkHandle(isp, nVols, allInodes);
1931 if (VALID_INO(ino)) {
1932 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1933 fdP = IH_OPEN(salvinfo->VGLinkH);
1935 if (VALID_INO(ino) && fdP != NULL) {
1936 struct versionStamp header;
1937 afs_sfsize_t nBytes;
1939 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1940 if (nBytes != sizeof(struct versionStamp)
1941 || header.magic != LINKTABLEMAGIC) {
1942 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1943 FDH_REALLYCLOSE(fdP);
1947 if (!VALID_INO(ino) || fdP == NULL) {
1948 Log("%s link table for volume %u.\n",
1949 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1951 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1954 struct ViceInodeInfo *ip;
1955 CreateLinkTable(salvinfo, isp, ino);
1956 fdP = IH_OPEN(salvinfo->VGLinkH);
1957 /* Sync fake 1 link counts to the link table, now that it exists */
1959 for (i = 0; i < nVols; i++) {
1960 ip = allInodes + isp[i].index;
1961 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1962 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1969 FDH_REALLYCLOSE(fdP);
1971 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1974 /* Salvage in reverse order--read/write volume last; this way any
1975 * Inodes not referenced by the time we salvage the read/write volume
1976 * can be picked up by the read/write volume */
1977 /* ACTUALLY, that's not done right now--the inodes just vanish */
1978 for (i = nVols - 1; i >= salvageTo; i--) {
1980 struct InodeSummary *lisp = &isp[i];
1981 #ifdef AFS_NAMEI_ENV
1982 /* If only the RO is present on this partition, the link table
1983 * shows up as a RW volume special file. Need to make sure the
1984 * salvager doesn't try to salvage the non-existent RW.
1986 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1987 /* If this only special inode is the link table, continue */
1988 if (inodes->u.special.type == VI_LINKTABLE) {
1995 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1996 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1997 /* Check inodes twice. The second time do things seriously. This
1998 * way the whole RO volume can be deleted, below, if anything goes wrong */
1999 for (check = 1; check >= 0; check--) {
2001 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2003 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2004 if (rw && deleteMe) {
2005 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2006 * volume won't be called */
2012 if (rw && check == 1)
2014 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2015 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2021 /* Fix actual inode counts */
2024 Log("totalInodes %d\n",totalInodes);
2025 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2026 static int TraceBadLinkCounts = 0;
2027 #ifdef AFS_NAMEI_ENV
2028 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2029 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2030 VGLinkH_p1 = ip->u.param[0];
2031 continue; /* Deal with this last. */
2034 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2035 TraceBadLinkCounts--; /* Limit reports, per volume */
2036 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2038 while (ip->linkCount > 0) {
2039 /* below used to assert, not break */
2041 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2042 Log("idec failed. inode %s errno %d\n",
2043 PrintInode(stmp, ip->inodeNumber), errno);
2049 while (ip->linkCount < 0) {
2050 /* these used to be asserts */
2052 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2053 Log("iinc failed. inode %s errno %d\n",
2054 PrintInode(stmp, ip->inodeNumber), errno);
2061 #ifdef AFS_NAMEI_ENV
2062 while (dec_VGLinkH > 0) {
2063 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2064 Log("idec failed on link table, errno = %d\n", errno);
2068 while (dec_VGLinkH < 0) {
2069 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2070 Log("iinc failed on link table, errno = %d\n", errno);
2077 /* Directory consistency checks on the rw volume */
2079 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2080 IH_RELEASE(salvinfo->VGLinkH);
2082 if (canfork && !debug) {
2089 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2091 /* Check headers BEFORE forking */
2095 for (i = 0; i < nVols; i++) {
2096 struct VolumeSummary *vs = isp[i].volSummary;
2097 VolumeDiskData volHeader;
2099 /* Don't salvage just because phantom rw volume is there... */
2100 /* (If a read-only volume exists, read/write inodes must also exist) */
2101 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2105 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2106 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2107 == sizeof(volHeader)
2108 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2109 && volHeader.dontSalvage == DONT_SALVAGE
2110 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2111 if (volHeader.inUse != 0) {
2112 volHeader.inUse = 0;
2113 volHeader.inService = 1;
2115 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2116 != sizeof(volHeader)) {
2132 /* SalvageVolumeHeaderFile
2134 * Salvage the top level V*.vol header file. Make sure the special files
2135 * exist and that there are no duplicates.
2137 * Calls SalvageHeader for each possible type of volume special file.
2141 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2142 struct ViceInodeInfo *inodes, int RW,
2143 int check, int *deleteMe)
2146 struct ViceInodeInfo *ip;
2147 int allinodesobsolete = 1;
2148 struct VolumeDiskHeader diskHeader;
2149 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2151 struct VolumeHeader tempHeader;
2152 struct afs_inode_info stuff[MAXINODETYPE];
2154 /* keeps track of special inodes that are probably 'good'; they are
2155 * referenced in the vol header, and are included in the given inodes
2160 } goodspecial[MAXINODETYPE];
2165 memset(goodspecial, 0, sizeof(goodspecial));
2167 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2169 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2171 Log("cannot allocate memory for inode skip array when salvaging "
2172 "volume %lu; not performing duplicate special inode recovery\n",
2173 afs_printable_uint32_lu(isp->volumeId));
2174 /* still try to perform the salvage; the skip array only does anything
2175 * if we detect duplicate special inodes */
2178 init_inode_info(&tempHeader, stuff);
2181 * First, look at the special inodes and see if any are referenced by
2182 * the existing volume header. If we find duplicate special inodes, we
2183 * can use this information to use the referenced inode (it's more
2184 * likely to be the 'good' one), and throw away the duplicates.
2186 if (isp->volSummary && skip) {
2187 /* use tempHeader, so we can use the stuff[] array to easily index
2188 * into the isp->volSummary special inodes */
2189 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2191 for (i = 0; i < isp->nSpecialInodes; i++) {
2192 ip = &inodes[isp->index + i];
2193 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2194 /* will get taken care of in a later loop */
2197 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2198 goodspecial[ip->u.special.type-1].valid = 1;
2199 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2204 memset(&tempHeader, 0, sizeof(tempHeader));
2205 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2206 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2207 tempHeader.id = isp->volumeId;
2208 tempHeader.parent = isp->RWvolumeId;
2210 /* Check for duplicates (inodes are sorted by type field) */
2211 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2212 ip = &inodes[isp->index + i];
2213 if (ip->u.special.type == (ip + 1)->u.special.type) {
2214 afs_ino_str_t stmp1, stmp2;
2216 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2217 /* Will be caught in the loop below */
2221 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2222 ip->u.special.type, isp->volumeId,
2223 PrintInode(stmp1, ip->inodeNumber),
2224 PrintInode(stmp2, (ip+1)->inodeNumber));
2226 if (skip && goodspecial[ip->u.special.type-1].valid) {
2227 Inode gi = goodspecial[ip->u.special.type-1].inode;
2230 Log("using special inode referenced by vol header (%s)\n",
2231 PrintInode(stmp1, gi));
2234 /* the volume header references some special inode of
2235 * this type in the inodes array; are we it? */
2236 if (ip->inodeNumber != gi) {
2238 } else if ((ip+1)->inodeNumber != gi) {
2239 /* in case this is the last iteration; we need to
2240 * make sure we check ip+1, too */
2245 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2253 for (i = 0; i < isp->nSpecialInodes; i++) {
2255 ip = &inodes[isp->index + i];
2256 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2258 Log("Rubbish header inode %s of type %d\n",
2259 PrintInode(stmp, ip->inodeNumber),
2260 ip->u.special.type);
2266 Log("Rubbish header inode %s of type %d; deleted\n",
2267 PrintInode(stmp, ip->inodeNumber),
2268 ip->u.special.type);
2269 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2270 if (skip && skip[i]) {
2271 if (orphans == ORPH_REMOVE) {
2272 Log("Removing orphan special inode %s of type %d\n",
2273 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2276 Log("Ignoring orphan special inode %s of type %d\n",
2277 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2278 /* fall through to the ip->linkCount--; line below */
2281 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2282 allinodesobsolete = 0;
2284 if (!check && ip->u.special.type != VI_LINKTABLE)
2285 ip->linkCount--; /* Keep the inode around */
2293 if (allinodesobsolete) {
2300 salvinfo->VGLinkH_cnt++; /* one for every header. */
2302 if (!RW && !check && isp->volSummary) {
2303 ClearROInUseBit(isp->volSummary);
2307 for (i = 0; i < MAXINODETYPE; i++) {
2308 if (stuff[i].inodeType == VI_LINKTABLE) {
2309 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2310 * And we may have recreated the link table earlier, so set the
2311 * RW header as well. The header magic was already checked.
2313 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2314 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2318 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2322 if (isp->volSummary == NULL) {
2324 char headerName[64];
2325 snprintf(headerName, sizeof headerName, VFORMAT,
2326 afs_printable_uint32_lu(isp->volumeId));
2327 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2328 salvinfo->fileSysPath, headerName);
2330 Log("No header file for volume %u\n", isp->volumeId);
2334 Log("No header file for volume %u; %screating %s\n",
2335 isp->volumeId, (Testing ? "it would have been " : ""),
2337 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2338 isp->volSummary->fileName = ToString(headerName);
2340 writefunc = VCreateVolumeDiskHeader;
2343 char headerName[64];
2344 /* hack: these two fields are obsolete... */
2345 isp->volSummary->header.volumeAcl = 0;
2346 isp->volSummary->header.volumeMountTable = 0;
2349 (&isp->volSummary->header, &tempHeader,
2350 sizeof(struct VolumeHeader))) {
2351 /* We often remove the name before calling us, so we make a fake one up */
2352 if (isp->volSummary->fileName) {
2353 strcpy(headerName, isp->volSummary->fileName);
2355 snprintf(headerName, sizeof headerName, VFORMAT,
2356 afs_printable_uint32_lu(isp->volumeId));
2357 isp->volSummary->fileName = ToString(headerName);
2359 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2360 salvinfo->fileSysPath, headerName);
2362 Log("Header file %s is damaged or no longer valid%s\n", path,
2363 (check ? "" : "; repairing"));
2367 writefunc = VWriteVolumeDiskHeader;
2371 memcpy(&isp->volSummary->header, &tempHeader,
2372 sizeof(struct VolumeHeader));
2375 Log("It would have written a new header file for volume %u\n",
2379 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2380 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2382 Log("Error %ld writing volume header file for volume %lu\n",
2383 afs_printable_int32_ld(code),
2384 afs_printable_uint32_lu(diskHeader.id));
2389 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2390 isp->volSummary->header.volumeInfo);
2395 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2396 struct InodeSummary *isp, int check, int *deleteMe)
2399 VolumeDiskData volumeInfo;
2400 struct versionStamp fileHeader;
2409 #ifndef AFS_NAMEI_ENV
2410 if (sp->inodeType == VI_LINKTABLE)
2411 return 0; /* header magic was already checked */
2413 if (*(sp->inode) == 0) {
2415 Log("Missing inode in volume header (%s)\n", sp->description);
2419 Log("Missing inode in volume header (%s); %s\n", sp->description,
2420 (Testing ? "it would have recreated it" : "recreating"));
2423 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2424 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2425 if (!VALID_INO(*(sp->inode)))
2427 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2428 sp->description, errno);
2433 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2434 fdP = IH_OPEN(specH);
2435 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2436 /* bail out early and destroy the volume */
2438 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2445 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2446 sp->description, errno);
2449 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2450 || header.fileHeader.magic != sp->stamp.magic)) {
2452 Log("Part of the header (%s) is corrupted\n", sp->description);
2453 FDH_REALLYCLOSE(fdP);
2457 Log("Part of the header (%s) is corrupted; recreating\n",
2460 /* header can be garbage; make sure we don't read garbage data from
2462 memset(&header, 0, sizeof(header));
2464 if (sp->inodeType == VI_VOLINFO
2465 && header.volumeInfo.destroyMe == DESTROY_ME) {
2468 FDH_REALLYCLOSE(fdP);
2472 if (recreate && !Testing) {
2475 ("Internal error: recreating volume header (%s) in check mode\n",
2477 nBytes = FDH_TRUNC(fdP, 0);
2479 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2480 sp->description, errno);
2482 /* The following code should be moved into vutil.c */
2483 if (sp->inodeType == VI_VOLINFO) {
2485 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2486 header.volumeInfo.stamp = sp->stamp;
2487 header.volumeInfo.id = isp->volumeId;
2488 header.volumeInfo.parentId = isp->RWvolumeId;
2489 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2490 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2491 isp->volumeId, isp->volumeId);
2492 header.volumeInfo.inService = 0;
2493 header.volumeInfo.blessed = 0;
2494 /* The + 1000 is a hack in case there are any files out in venus caches */
2495 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2496 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2497 header.volumeInfo.needsCallback = 0;
2498 gettimeofday(&tp, NULL);
2499 header.volumeInfo.creationDate = tp.tv_sec;
2501 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2502 sizeof(header.volumeInfo), 0);
2503 if (nBytes != sizeof(header.volumeInfo)) {
2506 ("Unable to write volume header file (%s) (errno = %d)\n",
2507 sp->description, errno);
2508 Abort("Unable to write entire volume header file (%s)\n",
2512 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2513 if (nBytes != sizeof(sp->stamp)) {
2516 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2517 sp->description, errno);
2519 ("Unable to write entire version stamp in volume header file (%s)\n",
2524 FDH_REALLYCLOSE(fdP);
2526 if (sp->inodeType == VI_VOLINFO) {
2527 salvinfo->VolInfo = header.volumeInfo;
2531 if (salvinfo->VolInfo.updateDate) {
2532 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2534 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2535 salvinfo->VolInfo.id,
2536 (Testing ? "it would have been " : ""), update);
2538 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2540 Log("%s (%u) not updated (created %s)\n",
2541 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2551 SalvageVnodes(struct SalvInfo *salvinfo,
2552 struct InodeSummary *rwIsp,
2553 struct InodeSummary *thisIsp,
2554 struct ViceInodeInfo *inodes, int check)
2556 int ilarge, ismall, ioffset, RW, nInodes;
2557 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2560 RW = (rwIsp == thisIsp);
2561 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2563 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2564 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2565 if (check && ismall == -1)
2568 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2569 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2570 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2574 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2575 struct ViceInodeInfo *ip, int nInodes,
2576 struct VolumeSummary *volSummary, int check)
2578 char buf[SIZEOF_LARGEDISKVNODE];
2579 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2581 StreamHandle_t *file;
2582 struct VnodeClassInfo *vcp;
2584 afs_sfsize_t nVnodes;
2585 afs_fsize_t vnodeLength;
2587 afs_ino_str_t stmp1, stmp2;
2591 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2592 fdP = IH_OPEN(handle);
2593 osi_Assert(fdP != NULL);
2594 file = FDH_FDOPEN(fdP, "r+");
2595 osi_Assert(file != NULL);
2596 vcp = &VnodeClassInfo[class];
2597 size = OS_SIZE(fdP->fd_fd);
2598 osi_Assert(size != -1);
2599 nVnodes = (size / vcp->diskSize) - 1;
2601 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2602 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2606 for (vnodeIndex = 0;
2607 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2608 nVnodes--, vnodeIndex++) {
2609 if (vnode->type != vNull) {
2610 int vnodeChanged = 0;
2611 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2612 if (VNDISK_GET_INO(vnode) == 0) {
2614 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2615 memset(vnode, 0, vcp->diskSize);
2619 if (vcp->magic != vnode->vnodeMagic) {
2620 /* bad magic #, probably partially created vnode */
2622 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2623 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2624 afs_printable_uint32_lu(vcp->magic));
2625 memset(vnode, 0, vcp->diskSize);
2629 Log("Partially allocated vnode %d deleted.\n",
2631 memset(vnode, 0, vcp->diskSize);
2635 /* ****** Should do a bit more salvage here: e.g. make sure
2636 * vnode type matches what it should be given the index */
2637 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2638 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2639 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2640 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2647 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2648 /* The following doesn't work, because the version number
2649 * is not maintained correctly by the file server */
2650 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2651 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2653 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2659 /* For RW volume, look for vnode with matching inode number;
2660 * if no such match, take the first determined by our sort
2662 struct ViceInodeInfo *lip = ip;
2663 int lnInodes = nInodes;
2665 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2666 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2675 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2676 /* "Matching" inode */
2680 vu = vnode->uniquifier;
2681 iu = ip->u.vnode.vnodeUniquifier;
2682 vd = vnode->dataVersion;
2683 id = ip->u.vnode.inodeDataVersion;
2685 * Because of the possibility of the uniquifier overflows (> 4M)
2686 * we compare them modulo the low 22-bits; we shouldn't worry
2687 * about mismatching since they shouldn't to many old
2688 * uniquifiers of the same vnode...
2690 if (IUnique(vu) != IUnique(iu)) {
2692 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2695 vnode->uniquifier = iu;
2696 #ifdef AFS_3DISPARES
2697 vnode->dataVersion = (id >= vd ?
2700 1887437 ? vd : id) :
2703 1887437 ? id : vd));
2705 #if defined(AFS_SGI_EXMAG)
2706 vnode->dataVersion = (id >= vd ?
2709 15099494 ? vd : id) :
2712 15099494 ? id : vd));
2714 vnode->dataVersion = (id > vd ? id : vd);
2715 #endif /* AFS_SGI_EXMAG */
2716 #endif /* AFS_3DISPARES */
2719 /* don't bother checking for vd > id any more, since
2720 * partial file transfers always result in this state,
2721 * and you can't do much else anyway (you've already
2722 * found the best data you can) */
2723 #ifdef AFS_3DISPARES
2724 if (!vnodeIsDirectory(vnodeNumber)
2725 && ((vd < id && (id - vd) < 1887437)
2726 || ((vd > id && (vd - id) > 1887437)))) {
2728 #if defined(AFS_SGI_EXMAG)
2729 if (!vnodeIsDirectory(vnodeNumber)
2730 && ((vd < id && (id - vd) < 15099494)
2731 || ((vd > id && (vd - id) > 15099494)))) {
2733 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2734 #endif /* AFS_SGI_EXMAG */
2737 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2738 vnode->dataVersion = id;
2743 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2746 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2748 VNDISK_SET_INO(vnode, ip->inodeNumber);
2753 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2755 VNDISK_SET_INO(vnode, ip->inodeNumber);
2758 VNDISK_GET_LEN(vnodeLength, vnode);
2759 if (ip->byteCount != vnodeLength) {
2762 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2767 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2768 VNDISK_SET_LEN(vnode, ip->byteCount);
2772 ip->linkCount--; /* Keep the inode around */
2775 } else { /* no matching inode */
2777 if (VNDISK_GET_INO(vnode) != 0
2778 || vnode->type == vDirectory) {
2779 /* No matching inode--get rid of the vnode */
2781 if (VNDISK_GET_INO(vnode)) {
2783 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2787 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2792 if (VNDISK_GET_INO(vnode)) {
2794 time_t serverModifyTime = vnode->serverModifyTime;
2795 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2799 time_t serverModifyTime = vnode->serverModifyTime;
2800 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2803 memset(vnode, 0, vcp->diskSize);
2806 /* Should not reach here becuase we checked for
2807 * (inodeNumber == 0) above. And where we zero the vnode,
2808 * we also goto vnodeDone.
2812 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2816 } /* VNDISK_GET_INO(vnode) != 0 */
2818 osi_Assert(!(vnodeChanged && check));
2819 if (vnodeChanged && !Testing) {
2820 osi_Assert(IH_IWRITE
2821 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2822 (char *)vnode, vcp->diskSize)
2824 salvinfo->VolumeChanged = 1; /* For break call back */
2835 struct VnodeEssence *
2836 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2839 struct VnodeInfo *vip;
2842 class = vnodeIdToClass(vnodeNumber);
2843 vip = &salvinfo->vnodeInfo[class];
2844 offset = vnodeIdToBitNumber(vnodeNumber);
2845 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2849 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2851 /* Copy the directory unconditionally if we are going to change it:
2852 * not just if was cloned.
2854 struct VnodeDiskObject vnode;
2855 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2856 Inode oldinode, newinode;
2859 if (dir->copied || Testing)
2861 DFlush(); /* Well justified paranoia... */
2864 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2865 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2867 osi_Assert(code == sizeof(vnode));
2868 oldinode = VNDISK_GET_INO(&vnode);
2869 /* Increment the version number by a whole lot to avoid problems with
2870 * clients that were promised new version numbers--but the file server
2871 * crashed before the versions were written to disk.
2874 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2875 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2877 osi_Assert(VALID_INO(newinode));
2878 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2880 VNDISK_SET_INO(&vnode, newinode);
2882 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2883 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2885 osi_Assert(code == sizeof(vnode));
2887 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2888 salvinfo->fileSysDevice, newinode,
2889 &salvinfo->VolumeChanged);
2890 /* Don't delete the original inode right away, because the directory is
2891 * still being scanned.
2897 * This function should either successfully create a new dir, or give up
2898 * and leave things the way they were. In particular, if it fails to write
2899 * the new dir properly, it should return w/o changing the reference to the
2903 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2905 struct VnodeDiskObject vnode;
2906 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2907 Inode oldinode, newinode;
2912 afs_int32 parentUnique = 1;
2913 struct VnodeEssence *vnodeEssence;
2918 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2920 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2921 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2923 osi_Assert(lcode == sizeof(vnode));
2924 oldinode = VNDISK_GET_INO(&vnode);
2925 /* Increment the version number by a whole lot to avoid problems with
2926 * clients that were promised new version numbers--but the file server
2927 * crashed before the versions were written to disk.
2930 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2931 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2933 osi_Assert(VALID_INO(newinode));
2934 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2935 &salvinfo->VolumeChanged);
2937 /* Assign . and .. vnode numbers from dir and vnode.parent.
2938 * The uniquifier for . is in the vnode.
2939 * The uniquifier for .. might be set to a bogus value of 1 and
2940 * the salvager will later clean it up.
2942 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2943 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2946 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2948 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2953 /* didn't really build the new directory properly, let's just give up. */
2954 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2955 Log("Directory salvage returned code %d, continuing.\n", code);
2957 Log("also failed to decrement link count on new inode");
2961 Log("Checking the results of the directory salvage...\n");
2962 if (!DirOK(&newdir)) {
2963 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2964 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2965 osi_Assert(code == 0);
2969 VNDISK_SET_INO(&vnode, newinode);
2970 length = afs_dir_Length(&newdir);
2971 VNDISK_SET_LEN(&vnode, length);
2973 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2974 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2976 osi_Assert(lcode == sizeof(vnode));
2979 nt_sync(salvinfo->fileSysDevice);
2981 sync(); /* this is slow, but hopefully rarely called. We don't have
2982 * an open FD on the file itself to fsync.
2986 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2988 /* make sure old directory file is really closed */
2989 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2990 FDH_REALLYCLOSE(fdP);
2992 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2993 osi_Assert(code == 0);
2994 dir->dirHandle = newdir;
2998 * arguments for JudgeEntry.
3000 struct judgeEntry_params {
3001 struct DirSummary *dir; /**< directory we're examining entries in */
3002 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3006 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3009 struct judgeEntry_params *params = arock;
3010 struct DirSummary *dir = params->dir;
3011 struct SalvInfo *salvinfo = params->salvinfo;
3012 struct VnodeEssence *vnodeEssence;
3013 afs_int32 dirOrphaned, todelete;
3015 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3017 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3018 if (vnodeEssence == NULL) {
3020 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3023 CopyOnWrite(salvinfo, dir);
3024 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3029 #ifndef AFS_NAMEI_ENV
3030 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3031 * mount inode for the partition. If this inode were deleted, it would crash
3034 if (vnodeEssence->InodeNumber == 0) {
3035 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3037 CopyOnWrite(salvinfo, dir);
3038 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3045 if (!(vnodeNumber & 1) && !Showmode
3046 && !(vnodeEssence->count || vnodeEssence->unique
3047 || vnodeEssence->modeBits)) {
3048 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3049 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3050 vnodeNumber, unique,
3051 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3055 CopyOnWrite(salvinfo, dir);
3056 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3062 /* Check if the Uniquifiers match. If not, change the directory entry
3063 * so its unique matches the vnode unique. Delete if the unique is zero
3064 * or if the directory is orphaned.
3066 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3067 if (!vnodeEssence->unique
3068 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3069 /* This is an orphaned directory. Don't delete the . or ..
3070 * entry. Otherwise, it will get created in the next
3071 * salvage and deleted again here. So Just skip it.
3076 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3079 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3083 fid.Vnode = vnodeNumber;
3084 fid.Unique = vnodeEssence->unique;
3085 CopyOnWrite(salvinfo, dir);
3086 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3088 osi_Assert(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3091 return 0; /* no need to continue */
3094 if (strcmp(name, ".") == 0) {
3095 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3098 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3100 CopyOnWrite(salvinfo, dir);
3101 osi_Assert(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3102 fid.Vnode = dir->vnodeNumber;
3103 fid.Unique = dir->unique;
3104 osi_Assert(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3107 vnodeNumber = fid.Vnode; /* Get the new Essence */
3108 unique = fid.Unique;
3109 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3112 } else if (strcmp(name, "..") == 0) {
3115 struct VnodeEssence *dotdot;
3116 pa.Vnode = dir->parent;
3117 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3118 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3119 pa.Unique = dotdot->unique;
3121 pa.Vnode = dir->vnodeNumber;
3122 pa.Unique = dir->unique;
3124 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3126 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3128 CopyOnWrite(salvinfo, dir);
3129 osi_Assert(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3130 osi_Assert(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3133 vnodeNumber = pa.Vnode; /* Get the new Essence */
3135 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3137 dir->haveDotDot = 1;
3138 } else if (strncmp(name, ".__afs", 6) == 0) {
3140 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3143 CopyOnWrite(salvinfo, dir);
3144 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3146 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3147 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3150 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3151 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3152 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3153 && !(vnodeEssence->modeBits & 0111)) {
3154 afs_sfsize_t nBytes;
3160 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3161 vnodeEssence->InodeNumber);
3164 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3168 size = FDH_SIZE(fdP);
3170 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3171 FDH_REALLYCLOSE(fdP);
3178 nBytes = FDH_PREAD(fdP, buf, size, 0);
3179 if (nBytes == size) {
3181 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3182 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3183 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3184 Testing ? "would convert" : "converted");
3185 vnodeEssence->modeBits |= 0111;
3186 vnodeEssence->changed = 1;
3187 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3188 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3189 dir->name ? dir->name : "??", name, buf);
3191 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3192 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3194 FDH_REALLYCLOSE(fdP);
3197 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3198 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3199 if (vnodeIdToClass(vnodeNumber) == vLarge
3200 && vnodeEssence->name == NULL) {
3202 if ((n = (char *)malloc(strlen(name) + 1)))
3204 vnodeEssence->name = n;
3207 /* The directory entry points to the vnode. Check to see if the
3208 * vnode points back to the directory. If not, then let the
3209 * directory claim it (else it might end up orphaned). Vnodes
3210 * already claimed by another directory are deleted from this
3211 * directory: hardlinks to the same vnode are not allowed
3212 * from different directories.
3214 if (vnodeEssence->parent != dir->vnodeNumber) {
3215 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3216 /* Vnode does not point back to this directory.
3217 * Orphaned dirs cannot claim a file (it may belong to
3218 * another non-orphaned dir).
3221 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3223 vnodeEssence->parent = dir->vnodeNumber;
3224 vnodeEssence->changed = 1;
3226 /* Vnode was claimed by another directory */
3229 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3230 } else if (vnodeNumber == 1) {
3231 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3233 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3237 CopyOnWrite(salvinfo, dir);
3238 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3243 /* This directory claims the vnode */
3244 vnodeEssence->claimed = 1;
3246 vnodeEssence->count--;
3251 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3252 VnodeClass class, Inode ino, Unique * maxu)
3254 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3255 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3256 char buf[SIZEOF_LARGEDISKVNODE];
3257 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3259 StreamHandle_t *file;
3264 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3265 fdP = IH_OPEN(vip->handle);
3266 osi_Assert(fdP != NULL);
3267 file = FDH_FDOPEN(fdP, "r+");
3268 osi_Assert(file != NULL);
3269 size = OS_SIZE(fdP->fd_fd);
3270 osi_Assert(size != -1);
3271 vip->nVnodes = (size / vcp->diskSize) - 1;
3272 if (vip->nVnodes > 0) {
3273 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3274 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3275 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3276 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3277 if (class == vLarge) {
3278 osi_Assert((vip->inodes = (Inode *)
3279 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3288 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3289 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3290 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3291 nVnodes--, vnodeIndex++) {
3292 if (vnode->type != vNull) {
3293 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3294 afs_fsize_t vnodeLength;
3295 vip->nAllocatedVnodes++;
3296 vep->count = vnode->linkCount;
3297 VNDISK_GET_LEN(vnodeLength, vnode);
3298 vep->blockCount = nBlocks(vnodeLength);
3299 vip->volumeBlockCount += vep->blockCount;
3300 vep->parent = vnode->parent;
3301 vep->unique = vnode->uniquifier;
3302 if (*maxu < vnode->uniquifier)
3303 *maxu = vnode->uniquifier;
3304 vep->modeBits = vnode->modeBits;
3305 vep->InodeNumber = VNDISK_GET_INO(vnode);
3306 vep->type = vnode->type;
3307 vep->author = vnode->author;
3308 vep->owner = vnode->owner;
3309 vep->group = vnode->group;
3310 if (vnode->type == vDirectory) {
3311 if (class != vLarge) {
3312 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3313 vip->nAllocatedVnodes--;
3314 memset(vnode, 0, sizeof(vnode));
3315 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3316 vnodeIndexOffset(vcp, vnodeNumber),
3317 (char *)&vnode, sizeof(vnode));
3318 salvinfo->VolumeChanged = 1;
3320 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3329 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3332 struct VnodeEssence *parentvp;
3338 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3339 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3340 strcat(path, OS_DIRSEP);
3341 strcat(path, vp->name);
3347 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3348 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3351 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3353 struct VnodeEssence *vep;
3356 return (1); /* Vnode zero does not exist */
3358 return (0); /* The root dir vnode is always claimed */
3359 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3360 if (!vep || !vep->claimed)
3361 return (1); /* Vnode is not claimed - it is orphaned */
3363 return (IsVnodeOrphaned(salvinfo, vep->parent));
3367 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3368 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3369 struct DirSummary *rootdir, int *rootdirfound)
3371 static struct DirSummary dir;
3372 static struct DirHandle dirHandle;
3373 struct VnodeEssence *parent;
3374 static char path[MAXPATHLEN];
3377 if (dirVnodeInfo->vnodes[i].salvaged)
3378 return; /* already salvaged */
3381 dirVnodeInfo->vnodes[i].salvaged = 1;
3383 if (dirVnodeInfo->inodes[i] == 0)
3384 return; /* Not allocated to a directory */
3386 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3387 if (dirVnodeInfo->vnodes[i].parent) {
3388 Log("Bad parent, vnode 1; %s...\n",
3389 (Testing ? "skipping" : "salvaging"));
3390 dirVnodeInfo->vnodes[i].parent = 0;
3391 dirVnodeInfo->vnodes[i].changed = 1;
3394 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3395 if (parent && parent->salvaged == 0)
3396 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3397 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3398 rootdir, rootdirfound);
3401 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3402 dir.unique = dirVnodeInfo->vnodes[i].unique;
3405 dir.parent = dirVnodeInfo->vnodes[i].parent;
3406 dir.haveDot = dir.haveDotDot = 0;
3407 dir.ds_linkH = alinkH;
3408 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3409 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3411 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3414 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3415 (Testing ? "skipping" : "salvaging"));
3418 CopyAndSalvage(salvinfo, &dir);
3420 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3423 dirHandle = dir.dirHandle;
3426 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3427 &dirVnodeInfo->vnodes[i], path);
3430 /* If enumeration failed for random reasons, we will probably delete
3431 * too much stuff, so we guard against this instead.
3433 struct judgeEntry_params judge_params;
3434 judge_params.salvinfo = salvinfo;
3435 judge_params.dir = &dir;
3437 osi_Assert(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3438 &judge_params) == 0);
3441 /* Delete the old directory if it was copied in order to salvage.
3442 * CopyOnWrite has written the new inode # to the disk, but we still
3443 * have the old one in our local structure here. Thus, we idec the
3447 if (dir.copied && !Testing) {
3448 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3449 osi_Assert(code == 0);
3450 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3453 /* Remember rootdir DirSummary _after_ it has been judged */
3454 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3455 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3463 * Get a new FID that can be used to create a new file.
3465 * @param[in] volHeader vol header for the volume
3466 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3467 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3468 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3469 * updated to the new max unique if we create a new
3473 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3474 VnodeClass class, AFSFid *afid, Unique *maxunique)
3477 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3478 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3482 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3483 /* no free vnodes; make a new one */
3484 salvinfo->vnodeInfo[class].nVnodes++;
3485 salvinfo->vnodeInfo[class].vnodes =
3486 realloc(salvinfo->vnodeInfo[class].vnodes,
3487 sizeof(struct VnodeEssence) * (i+1));
3489 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3492 afid->Vnode = bitNumberToVnodeNumber(i, class);
3494 if (volHeader->uniquifier < (*maxunique + 1)) {
3495 /* header uniq is bad; it will get bumped by 2000 later */
3496 afid->Unique = *maxunique + 1 + 2000;
3499 /* header uniq seems okay; just use that */
3500 afid->Unique = *maxunique = volHeader->uniquifier++;
3505 * Create a vnode for a README file explaining not to use a recreated-root vol.
3507 * @param[in] volHeader vol header for the volume
3508 * @param[in] alinkH ihandle for i/o for the volume
3509 * @param[in] vid volume id
3510 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3511 * updated to the new max unique if we create a new
3513 * @param[out] afid FID for the new readme vnode
3514 * @param[out] ainode the inode for the new readme file
3516 * @return operation status
3521 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3522 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3526 struct VnodeDiskObject *rvnode = NULL;
3528 IHandle_t *readmeH = NULL;
3529 struct VnodeEssence *vep;
3531 time_t now = time(NULL);
3533 /* Try to make the note brief, but informative. Only administrators should
3534 * be able to read this file at first, so we can hopefully assume they
3535 * know what AFS is, what a volume is, etc. */
3537 "This volume has been salvaged, but has lost its original root directory.\n"
3538 "The root directory that exists now has been recreated from orphan files\n"
3539 "from the rest of the volume. This recreated root directory may interfere\n"
3540 "with old cached data on clients, and there is no way the salvager can\n"
3541 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3542 "use this volume, but only copy the salvaged data to a new volume.\n"
3543 "Continuing to use this volume as it exists now may cause some clients to\n"
3544 "behave oddly when accessing this volume.\n"
3545 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3546 /* ^ the person reading this probably just lost some data, so they could
3547 * use some cheering up. */
3549 /* -1 for the trailing NUL */
3550 length = sizeof(readme) - 1;
3552 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3554 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3556 /* create the inode and write the contents */
3557 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3558 salvinfo->fileSysPath, 0, vid,
3559 afid->Vnode, afid->Unique, 1);
3560 if (!VALID_INO(readmeinode)) {
3561 Log("CreateReadme: readme IH_CREATE failed\n");
3565 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3566 bytes = IH_IWRITE(readmeH, 0, readme, length);
3567 IH_RELEASE(readmeH);
3569 if (bytes != length) {
3570 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3571 (int)sizeof(readme));
3575 /* create the vnode and write it out */
3576 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3578 Log("CreateRootDir: error alloc'ing memory\n");
3582 rvnode->type = vFile;
3584 rvnode->modeBits = 0777;
3585 rvnode->linkCount = 1;
3586 VNDISK_SET_LEN(rvnode, length);
3587 rvnode->uniquifier = afid->Unique;
3588 rvnode->dataVersion = 1;
3589 VNDISK_SET_INO(rvnode, readmeinode);
3590 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3595 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3597 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3598 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3599 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3601 if (bytes != SIZEOF_SMALLDISKVNODE) {
3602 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3603 (int)SIZEOF_SMALLDISKVNODE);
3607 /* update VnodeEssence for new readme vnode */
3608 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3610 vep->blockCount = nBlocks(length);
3611 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3612 vep->parent = rvnode->parent;
3613 vep->unique = rvnode->uniquifier;
3614 vep->modeBits = rvnode->modeBits;
3615 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3616 vep->type = rvnode->type;
3617 vep->author = rvnode->author;
3618 vep->owner = rvnode->owner;
3619 vep->group = rvnode->group;
3629 *ainode = readmeinode;
3634 if (IH_DEC(alinkH, readmeinode, vid)) {
3635 Log("CreateReadme (recovery): IH_DEC failed\n");
3647 * create a root dir for a volume that lacks one.
3649 * @param[in] volHeader vol header for the volume
3650 * @param[in] alinkH ihandle for disk access for this volume group
3651 * @param[in] vid volume id we're dealing with
3652 * @param[out] rootdir populated with info about the new root dir
3653 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3654 * updated to the new max unique if we create a new
3657 * @return operation status
3662 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3663 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3667 int decroot = 0, decreadme = 0;
3668 AFSFid did, readmeid;
3671 struct VnodeDiskObject *rootvnode = NULL;
3672 struct acl_accessList *ACL;
3675 struct VnodeEssence *vep;
3677 time_t now = time(NULL);
3679 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3680 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3684 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3685 /* We don't have any large vnodes in the volume; allocate room
3686 * for one so we can recreate the root dir */
3687 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3688 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3689 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3691 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3692 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3695 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3696 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3697 if (vep->type != vNull) {
3698 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3702 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3703 &readmeinode) != 0) {
3708 /* set the DV to a very high number, so it is unlikely that we collide
3709 * with a cached DV */
3712 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3714 if (!VALID_INO(rootinode)) {
3715 Log("CreateRootDir: IH_CREATE failed\n");
3720 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3721 rootinode, &salvinfo->VolumeChanged);
3725 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3726 Log("CreateRootDir: MakeDir failed\n");
3729 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3730 Log("CreateRootDir: Create failed\n");
3734 length = afs_dir_Length(&rootdir->dirHandle);
3735 DZap(&rootdir->dirHandle);
3737 /* create the new root dir vnode */
3738 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3740 Log("CreateRootDir: malloc failed\n");
3744 /* only give 'rl' permissions to 'system:administrators'. We do this to
3745 * try to catch the attention of an administrator, that they should not
3746 * be writing to this directory or continue to use it. */
3747 ACL = VVnodeDiskACL(rootvnode);
3748 ACL->size = sizeof(struct acl_accessList);
3749 ACL->version = ACL_ACLVERSION;
3753 ACL->entries[0].id = -204; /* system:administrators */
3754 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3756 rootvnode->type = vDirectory;
3757 rootvnode->cloned = 0;
3758 rootvnode->modeBits = 0777;
3759 rootvnode->linkCount = 2;
3760 VNDISK_SET_LEN(rootvnode, length);
3761 rootvnode->uniquifier = 1;
3762 rootvnode->dataVersion = dv;
3763 VNDISK_SET_INO(rootvnode, rootinode);
3764 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3765 rootvnode->author = 0;
3766 rootvnode->owner = 0;
3767 rootvnode->parent = 0;
3768 rootvnode->group = 0;
3769 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3771 /* write it out to disk */
3772 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3773 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3774 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3776 if (bytes != SIZEOF_LARGEDISKVNODE) {
3777 /* just cast to int and don't worry about printing real 64-bit ints;
3778 * a large disk vnode isn't anywhere near the 32-bit limit */
3779 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3780 (int)SIZEOF_LARGEDISKVNODE);
3784 /* update VnodeEssence for the new root vnode */
3785 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3787 vep->blockCount = nBlocks(length);
3788 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3789 vep->parent = rootvnode->parent;
3790 vep->unique = rootvnode->uniquifier;
3791 vep->modeBits = rootvnode->modeBits;
3792 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3793 vep->type = rootvnode->type;
3794 vep->author = rootvnode->author;
3795 vep->owner = rootvnode->owner;
3796 vep->group = rootvnode->group;
3806 /* update DirSummary for the new root vnode */
3807 rootdir->vnodeNumber = 1;
3808 rootdir->unique = 1;
3809 rootdir->haveDot = 1;
3810 rootdir->haveDotDot = 1;
3811 rootdir->rwVid = vid;
3812 rootdir->copied = 0;
3813 rootdir->parent = 0;
3814 rootdir->name = strdup(".");
3815 rootdir->vname = volHeader->name;
3816 rootdir->ds_linkH = alinkH;
3823 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3824 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3826 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3827 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3837 * salvage a volume group.
3839 * @param[in] salvinfo information for the curent salvage job
3840 * @param[in] rwIsp inode summary for rw volume
3841 * @param[in] alinkH link table inode handle
3843 * @return operation status
3847 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3849 /* This routine, for now, will only be called for read-write volumes */
3851 int BlocksInVolume = 0, FilesInVolume = 0;
3853 struct DirSummary rootdir, oldrootdir;
3854 struct VnodeInfo *dirVnodeInfo;
3855 struct VnodeDiskObject vnode;
3856 VolumeDiskData volHeader;
3858 int orphaned, rootdirfound = 0;
3859 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3860 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3861 struct VnodeEssence *vep;
3864 afs_sfsize_t nBytes;
3866 VnodeId LFVnode, ThisVnode;
3867 Unique LFUnique, ThisUnique;
3871 vid = rwIsp->volSummary->header.id;
3872 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3873 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3874 osi_Assert(nBytes == sizeof(volHeader));
3875 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3876 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3877 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3879 DistilVnodeEssence(salvinfo, vid, vLarge,
3880 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3881 DistilVnodeEssence(salvinfo, vid, vSmall,
3882 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3884 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3885 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3886 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3887 &rootdir, &rootdirfound);
3890 nt_sync(salvinfo->fileSysDevice);
3892 sync(); /* This used to be done lower level, for every dir */
3899 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3901 Log("Cannot find root directory for volume %lu; attempting to create "
3902 "a new one\n", afs_printable_uint32_lu(vid));
3904 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3909 salvinfo->VolumeChanged = 1;
3913 /* Parse each vnode looking for orphaned vnodes and
3914 * connect them to the tree as orphaned (if requested).
3916 oldrootdir = rootdir;
3917 for (class = 0; class < nVNODECLASSES; class++) {
3918 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3919 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3920 ThisVnode = bitNumberToVnodeNumber(v, class);
3921 ThisUnique = vep->unique;
3923 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3924 continue; /* Ignore unused, claimed, and root vnodes */
3926 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3927 * entry in this vnode had incremented the parent link count (In
3928 * JudgeEntry()). We need to go to the parent and decrement that
3929 * link count. But if the parent's unique is zero, then the parent
3930 * link count was not incremented in JudgeEntry().
3932 if (class == vLarge) { /* directory vnode */
3933 pv = vnodeIdToBitNumber(vep->parent);
3934 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3935 if (vep->parent == 1 && newrootdir) {
3936 /* this vnode's parent was the volume root, and
3937 * we just created the volume root. So, the parent
3938 * dir didn't exist during JudgeEntry, so the link
3939 * count was not inc'd there, so don't dec it here.
3945 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3951 continue; /* If no rootdir, can't attach orphaned files */
3953 /* Here we attach orphaned files and directories into the
3954 * root directory, LVVnode, making sure link counts stay correct.
3956 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3957 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3958 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3960 /* Update this orphaned vnode's info. Its parent info and
3961 * link count (do for orphaned directories and files).
3963 vep->parent = LFVnode; /* Parent is the root dir */
3964 vep->unique = LFUnique;
3967 vep->count--; /* Inc link count (root dir will pt to it) */
3969 /* If this orphaned vnode is a directory, change '..'.
3970 * The name of the orphaned dir/file is unknown, so we
3971 * build a unique name. No need to CopyOnWrite the directory
3972 * since it is not connected to tree in BK or RO volume and
3973 * won't be visible there.
3975 if (class == vLarge) {
3979 /* Remove and recreate the ".." entry in this orphaned directory */
3980 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3981 salvinfo->vnodeInfo[class].inodes[v],
3982 &salvinfo->VolumeChanged);
3984 pa.Unique = LFUnique;
3985 osi_Assert(afs_dir_Delete(&dh, "..") == 0);
3986 osi_Assert(afs_dir_Create(&dh, "..", &pa) == 0);
3988 /* The original parent's link count was decremented above.
3989 * Here we increment the new parent's link count.
3991 pv = vnodeIdToBitNumber(LFVnode);
3992 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3996 /* Go to the root dir and add this entry. The link count of the
3997 * root dir was incremented when ".." was created. Try 10 times.
3999 for (j = 0; j < 10; j++) {
4000 pa.Vnode = ThisVnode;
4001 pa.Unique = ThisUnique;
4003 snprintf(npath, sizeof npath, "%s.%u.%u",
4004 ((class == vLarge) ? "__ORPHANDIR__"
4005 : "__ORPHANFILE__"),
4006 ThisVnode, ThisUnique);
4008 CopyOnWrite(salvinfo, &rootdir);
4009 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4013 ThisUnique += 50; /* Try creating a different file */
4015 osi_Assert(code == 0);
4016 Log("Attaching orphaned %s to volume's root dir as %s\n",
4017 ((class == vLarge) ? "directory" : "file"), npath);
4019 } /* for each vnode in the class */
4020 } /* for each class of vnode */
4022 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4024 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4026 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4028 osi_Assert(code == 0);
4029 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4032 DFlush(); /* Flush the changes */
4033 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4034 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4035 orphans = ORPH_IGNORE;
4038 /* Write out all changed vnodes. Orphaned files and directories
4039 * will get removed here also (if requested).
4041 for (class = 0; class < nVNODECLASSES; class++) {
4042 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4043 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4044 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4045 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4046 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4047 for (i = 0; i < nVnodes; i++) {
4048 struct VnodeEssence *vnp = &vnodes[i];
4049 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4051 /* If the vnode is good but is unclaimed (not listed in
4052 * any directory entries), then it is orphaned.
4055 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4056 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4060 if (vnp->changed || vnp->count) {
4063 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4064 vnodeIndexOffset(vcp, vnodeNumber),
4065 (char *)&vnode, sizeof(vnode));
4066 osi_Assert(nBytes == sizeof(vnode));
4068 vnode.parent = vnp->parent;
4069 oldCount = vnode.linkCount;
4070 vnode.linkCount = vnode.linkCount - vnp->count;
4073 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4075 if (!vnp->todelete) {
4076 /* Orphans should have already been attached (if requested) */
4077 osi_Assert(orphans != ORPH_ATTACH);
4078 oblocks += vnp->blockCount;
4081 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4083 BlocksInVolume -= vnp->blockCount;
4085 if (VNDISK_GET_INO(&vnode)) {
4087 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4088 osi_Assert(code == 0);
4090 memset(&vnode, 0, sizeof(vnode));
4092 } else if (vnp->count) {
4094 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4097 vnode.modeBits = vnp->modeBits;
4100 vnode.dataVersion++;
4103 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4104 vnodeIndexOffset(vcp, vnodeNumber),
4105 (char *)&vnode, sizeof(vnode));
4106 osi_Assert(nBytes == sizeof(vnode));
4108 salvinfo->VolumeChanged = 1;
4112 if (!Showmode && ofiles) {
4113 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4115 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4119 for (class = 0; class < nVNODECLASSES; class++) {
4120 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4121 for (i = 0; i < vip->nVnodes; i++)
4122 if (vip->vnodes[i].name)
4123 free(vip->vnodes[i].name);
4130 /* Set correct resource utilization statistics */
4131 volHeader.filecount = FilesInVolume;
4132 volHeader.diskused = BlocksInVolume;
4134 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4135 if (volHeader.uniquifier < (maxunique + 1)) {
4137 Log("Volume uniquifier is too low; fixed\n");
4138 /* Plus 2,000 in case there are workstations out there with
4139 * cached vnodes that have since been deleted
4141 volHeader.uniquifier = (maxunique + 1 + 2000);
4145 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4146 "Only use this salvaged volume to copy data to another volume; "
4147 "do not continue to use this volume (%lu) as-is.\n",
4148 afs_printable_uint32_lu(vid));
4151 if (!Testing && salvinfo->VolumeChanged) {
4152 #ifdef FSSYNC_BUILD_CLIENT
4153 if (salvinfo->useFSYNC) {
4154 afs_int32 fsync_code;
4156 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4158 Log("Error trying to tell the fileserver to break callbacks for "
4159 "changed volume %lu; error code %ld\n",
4160 afs_printable_uint32_lu(vid),
4161 afs_printable_int32_ld(fsync_code));
4163 salvinfo->VolumeChanged = 0;
4166 #endif /* FSSYNC_BUILD_CLIENT */
4168 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4169 if (!salvinfo->useFSYNC) {
4170 /* A volume's contents have changed, but the fileserver will not
4171 * break callbacks on the volume until it tries to load the vol
4172 * header. So, to reduce the amount of time a client could have
4173 * stale data, remove fsstate.dat, so the fileserver will init
4174 * callback state with all clients. This is a very coarse hammer,
4175 * and in the future we should just record which volumes have
4177 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4178 if (code && errno != ENOENT) {
4179 Log("Error %d when trying to unlink FS state file %s\n", errno,
4180 AFSDIR_SERVER_FSSTATE_FILEPATH);
4186 /* Turn off the inUse bit; the volume's been salvaged! */
4187 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4188 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4189 volHeader.inService = 1; /* allow service again */
4190 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4191 volHeader.dontSalvage = DONT_SALVAGE;
4192 salvinfo->VolumeChanged = 0;
4194 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4195 osi_Assert(nBytes == sizeof(volHeader));
4198 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4199 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4200 FilesInVolume, BlocksInVolume);
4203 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4204 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4210 ClearROInUseBit(struct VolumeSummary *summary)
4212 IHandle_t *h = summary->volumeInfoHandle;
4213 afs_sfsize_t nBytes;
4215 VolumeDiskData volHeader;
4217 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4218 osi_Assert(nBytes == sizeof(volHeader));
4219 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4220 volHeader.inUse = 0;
4221 volHeader.needsSalvaged = 0;
4222 volHeader.inService = 1;
4223 volHeader.dontSalvage = DONT_SALVAGE;
4225 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4226 osi_Assert(nBytes == sizeof(volHeader));
4231 * Possible delete the volume.
4233 * deleteMe - Always do so, only a partial volume.
4236 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4237 char *message, int deleteMe, int check)
4239 if (readOnly(isp) || deleteMe) {
4240 if (isp->volSummary && isp->volSummary->fileName) {
4243 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4245 Log("It will be deleted on this server (you may find it elsewhere)\n");
4248 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4250 Log("it will be deleted instead. It should be recloned.\n");
4255 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4257 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4259 Log("Error %ld destroying volume disk header for volume %lu\n",
4260 afs_printable_int32_ld(code),
4261 afs_printable_uint32_lu(isp->volumeId));
4264 /* make sure we actually delete the fileName file; ENOENT
4265 * is fine, since VDestroyVolumeDiskHeader probably already
4267 if (unlink(path) && errno != ENOENT) {
4268 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4270 if (salvinfo->useFSYNC) {
4271 AskDelete(salvinfo, isp->volumeId);
4273 isp->volSummary->deleted = 1;
4276 } else if (!check) {
4277 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4279 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4283 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4285 * Locks a volume on disk for salvaging.
4287 * @param[in] volumeId volume ID to lock
4289 * @return operation status
4291 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4292 * checked out and locked again
4297 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4302 /* should always be WRITE_LOCK, but keep the lock-type logic all
4303 * in one place, in VVolLockType. Params will be ignored, but
4304 * try to provide what we're logically doing. */
4305 locktype = VVolLockType(V_VOLUPD, 1);
4307 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4309 if (code == EBUSY) {
4310 Abort("Someone else appears to be using volume %lu; Aborted\n",
4311 afs_printable_uint32_lu(volumeId));
4313 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4314 afs_printable_int32_ld(code),
4315 afs_printable_uint32_lu(volumeId));
4318 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4319 if (code == SYNC_DENIED) {
4320 /* need to retry checking out volumes */
4323 if (code != SYNC_OK) {
4324 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4325 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4328 /* set inUse = programType in the volume header to ensure that nobody
4329 * tries to use this volume again without salvaging, if we somehow crash
4330 * or otherwise exit before finishing the salvage.
4334 struct VolumeHeader header;
4335 struct VolumeDiskHeader diskHeader;
4336 struct VolumeDiskData volHeader;
4338 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4343 DiskToVolumeHeader(&header, &diskHeader);
4345 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4346 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4347 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4353 volHeader.inUse = programType;
4355 /* If we can't re-write the header, bail out and error. We don't
4356 * assert when reading the header, since it's possible the
4357 * header isn't really there (when there's no data associated
4358 * with the volume; we just delete the vol header file in that
4359 * case). But if it's there enough that we can read it, but
4360 * somehow we cannot write to it to signify we're salvaging it,
4361 * we've got a big problem and we cannot continue. */
4362 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4369 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4372 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4374 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4376 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4377 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4378 if (code != SYNC_OK) {
4379 Log("AskError: failed to force volume %lu into error state; "
4380 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4381 (long)code, SYNC_res2string(code));
4383 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4387 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4392 memset(&res, 0, sizeof(res));
4394 for (i = 0; i < 3; i++) {
4395 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4396 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4398 if (code == SYNC_OK) {
4400 } else if (code == SYNC_DENIED) {
4402 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4404 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4405 Abort("Salvage aborted\n");
4406 } else if (code == SYNC_BAD_COMMAND) {
4407 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4410 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4411 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4413 Log("AskOffline: fileserver is DAFS but we are not.\n");
4416 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4417 Log("AskOffline: fileserver is not DAFS but we are.\n");
4419 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4422 Abort("Salvage aborted\n");
4425 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4426 FSYNC_clientFinis();
4430 if (code != SYNC_OK) {
4431 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4432 Abort("Salvage aborted\n");
4436 /* don't want to pass around state; remember it here */
4437 static int isDAFS = -1;
4441 afs_int32 code, i, ret = 0;
4444 /* we don't care if we race. the answer shouldn't change */
4448 memset(&res, 0, sizeof(res));
4450 for (i = 0; i < 3; i++) {
4451 code = FSYNC_VolOp(1, NULL,
4452 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4454 if (code == SYNC_OK) {
4457 } else if (code == SYNC_DENIED) {
4460 } else if (code == SYNC_BAD_COMMAND) {
4463 } else if (code == SYNC_FAILED) {
4464 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4471 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4472 FSYNC_clientFinis();
4482 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4484 struct VolumeDiskHeader diskHdr;
4486 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4488 /* volume probably does not exist; no need to bring back online */
4491 AskOnline(salvinfo, volumeId);
4495 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4499 for (i = 0; i < 3; i++) {
4500 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4501 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4503 if (code == SYNC_OK) {
4505 } else if (code == SYNC_DENIED) {
4506 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4507 } else if (code == SYNC_BAD_COMMAND) {
4508 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4510 Log("AskOnline: please make sure file server binaries are same version.\n");
4514 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4515 FSYNC_clientFinis();
4522 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4527 for (i = 0; i < 3; i++) {
4528 memset(&res, 0, sizeof(res));
4529 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4530 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4532 if (code == SYNC_OK) {
4534 } else if (code == SYNC_DENIED) {
4535 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4536 } else if (code == SYNC_BAD_COMMAND) {
4537 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4540 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4541 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4543 Log("AskOnline: fileserver is DAFS but we are not.\n");
4546 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4547 Log("AskOnline: fileserver is not DAFS but we are.\n");
4549 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4553 } else if (code == SYNC_FAILED &&
4554 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4555 res.hdr.reason == FSYNC_WRONG_PART)) {
4556 /* volume is already effectively 'deleted' */
4560 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4561 FSYNC_clientFinis();
4568 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4570 /* Volume parameter is passed in case iopen is upgraded in future to
4571 * require a volume Id to be passed
4574 IHandle_t *srcH, *destH;
4575 FdHandle_t *srcFdP, *destFdP;
4577 afs_foff_t size = 0;
4579 IH_INIT(srcH, device, rwvolume, inode1);
4580 srcFdP = IH_OPEN(srcH);
4581 osi_Assert(srcFdP != NULL);
4582 IH_INIT(destH, device, rwvolume, inode2);
4583 destFdP = IH_OPEN(destH);
4584 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4585 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4588 osi_Assert(nBytes == 0);
4589 FDH_REALLYCLOSE(srcFdP);
4590 FDH_REALLYCLOSE(destFdP);
4597 PrintInodeList(struct SalvInfo *salvinfo)
4599 struct ViceInodeInfo *ip;
4600 struct ViceInodeInfo *buf;
4603 afs_sfsize_t st_size;
4605 st_size = OS_SIZE(salvinfo->inodeFd);
4606 osi_Assert(st_size >= 0);
4607 buf = (struct ViceInodeInfo *)malloc(st_size);
4608 osi_Assert(buf != NULL);
4609 nInodes = st_size / sizeof(struct ViceInodeInfo);
4610 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4611 for (ip = buf; nInodes--; ip++) {
4612 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4613 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4614 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4615 ip->u.param[2], ip->u.param[3]);
4621 PrintInodeSummary(struct SalvInfo *salvinfo)
4624 struct InodeSummary *isp;
4626 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4627 isp = &salvinfo->inodeSummary[i];
4628 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4633 PrintVolumeSummary(struct SalvInfo *salvinfo)
4636 struct VolumeSummary *vsp;
4638 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4639 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4649 osi_Assert(0); /* Fork is never executed in the NT code path */
4653 #ifdef AFS_DEMAND_ATTACH_FS
4654 if ((f == 0) && (programType == salvageServer)) {
4655 /* we are a salvageserver child */
4656 #ifdef FSSYNC_BUILD_CLIENT
4657 VChildProcReconnectFS_r();
4659 #ifdef SALVSYNC_BUILD_CLIENT
4663 #endif /* AFS_DEMAND_ATTACH_FS */
4664 #endif /* !AFS_NT40_ENV */
4674 #ifdef AFS_DEMAND_ATTACH_FS
4675 if (programType == salvageServer) {
4676 #ifdef SALVSYNC_BUILD_CLIENT
4679 #ifdef FSSYNC_BUILD_CLIENT
4683 #endif /* AFS_DEMAND_ATTACH_FS */
4686 if (main_thread != pthread_self())
4687 pthread_exit((void *)code);
4700 pid = wait(&status);
4701 osi_Assert(pid != -1);
4702 if (WCOREDUMP(status))
4703 Log("\"%s\" core dumped!\n", prog);
4704 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4710 TimeStamp(time_t clock, int precision)
4713 static char timestamp[20];
4714 lt = localtime(&clock);
4716 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4718 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4723 CheckLogFile(char * log_path)
4725 char oldSlvgLog[AFSDIR_PATH_MAX];
4727 #ifndef AFS_NT40_ENV
4734 strcpy(oldSlvgLog, log_path);
4735 strcat(oldSlvgLog, ".old");
4737 renamefile(log_path, oldSlvgLog);
4738 logFile = afs_fopen(log_path, "a");
4740 if (!logFile) { /* still nothing, use stdout */
4744 #ifndef AFS_NAMEI_ENV
4745 AFS_DEBUG_IOPS_LOG(logFile);
4750 #ifndef AFS_NT40_ENV
4752 TimeStampLogFile(char * log_path)
4754 char stampSlvgLog[AFSDIR_PATH_MAX];
4759 lt = localtime(&now);
4760 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4761 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4762 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4763 lt->tm_min, lt->tm_sec);
4765 /* try to link the logfile to a timestamped filename */
4766 /* if it fails, oh well, nothing we can do */
4767 link(log_path, stampSlvgLog);
4776 #ifndef AFS_NT40_ENV
4778 printf("Can't show log since using syslog.\n");
4789 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4792 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4795 while (fgets(line, sizeof(line), logFile))
4802 Log(const char *format, ...)
4808 va_start(args, format);
4809 vsnprintf(tmp, sizeof tmp, format, args);
4811 #ifndef AFS_NT40_ENV
4813 syslog(LOG_INFO, "%s", tmp);
4817 gettimeofday(&now, NULL);
4818 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4824 Abort(const char *format, ...)
4829 va_start(args, format);
4830 vsnprintf(tmp, sizeof tmp, format, args);
4832 #ifndef AFS_NT40_ENV
4834 syslog(LOG_INFO, "%s", tmp);
4838 fprintf(logFile, "%s", tmp);
4850 ToString(const char *s)
4853 p = (char *)malloc(strlen(s) + 1);
4854 osi_Assert(p != NULL);
4859 /* Remove the FORCESALVAGE file */
4861 RemoveTheForce(char *path)
4864 struct afs_stat_st force; /* so we can use afs_stat to find it */
4865 strcpy(target,path);
4866 strcat(target,"/FORCESALVAGE");
4867 if (!Testing && ForceSalvage) {
4868 if (afs_stat(target,&force) == 0) unlink(target);
4872 #ifndef AFS_AIX32_ENV
4874 * UseTheForceLuke - see if we can use the force
4877 UseTheForceLuke(char *path)
4879 struct afs_stat_st force;
4881 strcpy(target,path);
4882 strcat(target,"/FORCESALVAGE");
4884 return (afs_stat(target, &force) == 0);
4888 * UseTheForceLuke - see if we can use the force
4891 * The VRMIX fsck will not muck with the filesystem it is supposedly
4892 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4893 * muck directly with the root inode, which is within the normal
4895 * ListViceInodes() has a side effect of setting ForceSalvage if
4896 * it detects a need, based on root inode examination.
4899 UseTheForceLuke(char *path)
4902 return 0; /* sorry OB1 */
4907 /* NT support routines */
4909 static char execpathname[MAX_PATH];
4911 nt_SalvagePartition(char *partName, int jobn)
4916 if (!*execpathname) {
4917 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4918 if (!n || n == 1023)
4921 job.cj_magic = SALVAGER_MAGIC;
4922 job.cj_number = jobn;
4923 (void)strcpy(job.cj_part, partName);
4924 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4929 nt_SetupPartitionSalvage(void *datap, int len)
4931 childJob_t *jobp = (childJob_t *) datap;
4932 char logname[AFSDIR_PATH_MAX];
4934 if (len != sizeof(childJob_t))
4936 if (jobp->cj_magic != SALVAGER_MAGIC)
4941 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4943 logFile = afs_fopen(logname, "w");
4951 #endif /* AFS_NT40_ENV */