2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
105 #if defined(AFS_VFSINCL_ENV)
106 #include <sys/vnode.h>
108 #include <sys/fs/ufs_inode.h>
110 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
111 #include <ufs/ufs/dinode.h>
112 #include <ufs/ffs/fs.h>
114 #include <ufs/inode.h>
117 #else /* AFS_VFSINCL_ENV */
119 #include <ufs/inode.h>
120 #else /* AFS_OSF_ENV */
121 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
122 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
160 #include <afs/afssyscalls.h>
164 #include "partition.h"
165 #include "daemon_com.h"
166 #include "daemon_com_inline.h"
168 #include "fssync_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[9]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = (struct job *)malloc(sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 memset(thisjob, 0, sizeof(struct job));
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 osi_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 osi_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
615 openlog("salvager", LOG_PID, useSyslogFacility);
619 snprintf(logFileName, sizeof logFileName, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb);
622 logFile = afs_fopen(logFileName, "w");
627 SalvageFileSys1(jobs[startjob]->partP, 0);
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
634 /* If waited for all jobs to complete, now collect log files and return */
636 if (!useSyslog) /* if syslogging - no need to collect */
639 for (i = 0; i < jobcount; i++) {
640 snprintf(logFileName, sizeof logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
642 if ((passLog = afs_fopen(logFileName, "r"))) {
643 while (fgets(buf, sizeof(buf), passLog)) {
648 (void)unlink(logFileName);
657 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
659 if (!canfork || debug || Fork() == 0) {
660 SalvageFileSys1(partP, singleVolumeNumber);
661 if (canfork && !debug) {
666 Wait("SalvageFileSys");
670 get_DevName(char *pbuffer, char *wpath)
672 char pbuf[128], *ptr;
673 strcpy(pbuf, pbuffer);
674 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
680 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
682 strcpy(pbuffer, ptr + 1);
689 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 char inodeListPath[256];
693 FD_t inodeFile = INVALID_FD;
694 static char tmpDevName[100];
695 static char wpath[100];
696 struct VolumeSummary *vsp, *esp;
700 struct SalvInfo l_salvinfo;
701 struct SalvInfo *salvinfo = &l_salvinfo;
704 memset(salvinfo, 0, sizeof(*salvinfo));
707 if (inodeFile != INVALID_FD) {
709 inodeFile = INVALID_FD;
711 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
712 Abort("Raced too many times with fileserver restarts while trying to "
713 "checkout/lock volumes; Aborted\n");
715 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
717 /* unlock all previous volume locks, since we're about to lock them
719 VLockFileReinit(&partP->volLockFile);
721 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
723 salvinfo->fileSysPartition = partP;
724 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
725 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
730 name = partP->devName;
732 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 salvinfo->fileSysDeviceName = name;
736 salvinfo->filesysfulldev = wpath;
739 if (singleVolumeNumber) {
740 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
741 /* only non-DAFS locks the partition when salvaging a single volume;
742 * DAFS will lock the individual volumes in the VG */
743 VLockPartition(partP->name);
744 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
748 /* salvageserver already setup fssync conn for us */
749 if ((programType != salvageServer) && !VConnectFS()) {
750 Abort("Couldn't connect to file server\n");
753 salvinfo->useFSYNC = 1;
754 AskOffline(salvinfo, singleVolumeNumber);
755 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
756 if (LockVolume(salvinfo, singleVolumeNumber)) {
759 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
762 salvinfo->useFSYNC = 0;
763 VLockPartition(partP->name);
767 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
770 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
771 partP->name, name, (Testing ? "(READONLY mode)" : ""));
773 Log("***Forced salvage of all volumes on this partition***\n");
778 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
785 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
786 while ((dp = readdir(dirp))) {
787 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
788 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
790 Log("Removing old salvager temp files %s\n", dp->d_name);
791 strcpy(npath, salvinfo->fileSysPath);
792 strcat(npath, OS_DIRSEP);
793 strcat(npath, dp->d_name);
799 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
801 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
802 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
804 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
808 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
809 if (inodeFile == INVALID_FD) {
810 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
813 /* Using nt_unlink here since we're really using the delete on close
814 * semantics of unlink. In most places in the salvager, we really do
815 * mean to unlink the file at that point. Those places have been
816 * modified to actually do that so that the NT crt can be used there.
818 * jaltman - On NT delete on close cannot be applied to a file while the
819 * process has an open file handle that does not have DELETE file
820 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
821 * delete privileges. As a result the nt_unlink() call will always
824 code = nt_unlink(inodeListPath);
826 code = unlink(inodeListPath);
829 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
832 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
836 salvinfo->inodeFd = inodeFile;
837 if (salvinfo->inodeFd == INVALID_FD)
838 Abort("Temporary file %s is missing...\n", inodeListPath);
839 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
840 if (ListInodeOption) {
841 PrintInodeList(salvinfo);
842 if (singleVolumeNumber) {
843 /* We've checked out the volume from the fileserver, and we need
844 * to give it back. We don't know if the volume exists or not,
845 * so we don't know whether to AskOnline or not. Try to determine
846 * if the volume exists by trying to read the volume header, and
847 * AskOnline if it is readable. */
848 MaybeAskOnline(salvinfo, singleVolumeNumber);
852 /* enumerate volumes in the partition.
853 * figure out sets of read-only + rw volumes.
854 * salvage each set, read-only volumes first, then read-write.
855 * Fix up inodes on last volume in set (whether it is read-write
858 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
862 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
863 i < salvinfo->nVolumesInInodeFile; i = j) {
864 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
866 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
868 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
869 struct VolumeSummary *tsp;
870 /* Scan volume list (from partition root directory) looking for the
871 * current rw volume number in the volume list from the inode scan.
872 * If there is one here that is not in the inode volume list,
874 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
876 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
878 /* Now match up the volume summary info from the root directory with the
879 * entry in the volume list obtained from scanning inodes */
880 salvinfo->inodeSummary[j].volSummary = NULL;
881 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
882 if (tsp->header.id == vid) {
883 salvinfo->inodeSummary[j].volSummary = tsp;
889 /* Salvage the group of volumes (several read-only + 1 read/write)
890 * starting with the current read-only volume we're looking at.
893 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
895 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
896 #endif /* AFS_NT40_ENV */
900 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
901 for (; vsp < esp; vsp++) {
903 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
906 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
907 RemoveTheForce(salvinfo->fileSysPath);
909 if (!Testing && singleVolumeNumber) {
911 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
912 /* unlock vol headers so the fs can attach them when we AskOnline */
913 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
914 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
916 /* Step through the volumeSummary list and set all volumes on-line.
917 * Most volumes were taken off-line in GetVolumeSummary.
918 * If a volume was deleted, don't tell the fileserver anything, since
919 * we already told the fileserver the volume was deleted back when we
920 * we destroyed the volume header.
921 * Also, make sure we bring the singleVolumeNumber back online first.
924 for (j = 0; j < salvinfo->nVolumes; j++) {
925 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
927 if (!salvinfo->volumeSummaryp[j].deleted) {
928 AskOnline(salvinfo, singleVolumeNumber);
934 /* If singleVolumeNumber is not in our volumeSummary, it means that
935 * at least one other volume in the VG is on the partition, but the
936 * RW volume is not. We've already AskOffline'd it by now, though,
937 * so make sure we don't still have the volume checked out. */
938 AskDelete(salvinfo, singleVolumeNumber);
941 for (j = 0; j < salvinfo->nVolumes; j++) {
942 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
943 if (!salvinfo->volumeSummaryp[j].deleted) {
944 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
950 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
951 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
954 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
958 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
961 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
964 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
967 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
969 Log("Error %ld destroying volume disk header for volume %lu\n",
970 afs_printable_int32_ld(code),
971 afs_printable_uint32_lu(vsp->header.id));
974 /* make sure we actually delete the fileName file; ENOENT
975 * is fine, since VDestroyVolumeDiskHeader probably already
977 if (unlink(path) && errno != ENOENT) {
978 Log("Unable to unlink %s (errno = %d)\n", path, errno);
980 if (salvinfo->useFSYNC) {
981 AskDelete(salvinfo, vsp->header.id);
989 CompareInodes(const void *_p1, const void *_p2)
991 const struct ViceInodeInfo *p1 = _p1;
992 const struct ViceInodeInfo *p2 = _p2;
993 if (p1->u.vnode.vnodeNumber == INODESPECIAL
994 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
995 VolumeId p1rwid, p2rwid;
997 (p1->u.vnode.vnodeNumber ==
998 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1000 (p2->u.vnode.vnodeNumber ==
1001 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1002 if (p1rwid < p2rwid)
1004 if (p1rwid > p2rwid)
1006 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1007 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1008 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1009 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1010 if (p1->u.vnode.volumeId == p1rwid)
1012 if (p2->u.vnode.volumeId == p2rwid)
1014 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1016 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1017 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1018 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1020 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1022 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1024 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1026 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1028 /* The following tests are reversed, so that the most desirable
1029 * of several similar inodes comes first */
1030 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1031 #ifdef AFS_3DISPARES
1032 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1033 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1036 #ifdef AFS_SGI_EXMAG
1037 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1038 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1043 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1044 #ifdef AFS_3DISPARES
1045 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1046 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1049 #ifdef AFS_SGI_EXMAG
1050 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1051 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1056 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1057 #ifdef AFS_3DISPARES
1058 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1059 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1062 #ifdef AFS_SGI_EXMAG
1063 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1064 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1069 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1070 #ifdef AFS_3DISPARES
1071 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1072 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1075 #ifdef AFS_SGI_EXMAG
1076 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1077 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1086 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1087 struct InodeSummary *summary)
1089 VolumeId volume = ip->u.vnode.volumeId;
1090 VolumeId rwvolume = volume;
1095 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1097 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1099 rwvolume = ip->u.special.parentId;
1100 /* This isn't quite right, as there could (in error) be different
1101 * parent inodes in different special vnodes */
1103 if (maxunique < ip->u.vnode.vnodeUniquifier)
1104 maxunique = ip->u.vnode.vnodeUniquifier;
1108 summary->volumeId = volume;
1109 summary->RWvolumeId = rwvolume;
1110 summary->nInodes = n;
1111 summary->nSpecialInodes = nSpecial;
1112 summary->maxUniquifier = maxunique;
1116 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1118 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1119 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1120 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1125 * Collect list of inodes in file named by path. If a truly fatal error,
1126 * unlink the file and abort. For lessor errors, return -1. The file will
1127 * be unlinked by the caller.
1130 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1134 struct ViceInodeInfo *ip, *ip_save;
1135 struct InodeSummary summary;
1136 char summaryFileName[50];
1137 FD_t summaryFile = INVALID_FD;
1139 char *dev = salvinfo->fileSysPath;
1140 char *wpath = salvinfo->fileSysPath;
1142 char *dev = salvinfo->fileSysDeviceName;
1143 char *wpath = salvinfo->filesysfulldev;
1145 char *part = salvinfo->fileSysPath;
1150 afs_sfsize_t st_size;
1152 /* This file used to come from vfsck; cobble it up ourselves now... */
1154 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1155 singleVolumeNumber ? OnlyOneVolume : 0,
1156 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1158 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1162 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1164 if (forceSal && !ForceSalvage) {
1165 Log("***Forced salvage of all volumes on this partition***\n");
1168 OS_SEEK(inodeFile, 0L, SEEK_SET);
1169 salvinfo->inodeFd = inodeFile;
1170 if (salvinfo->inodeFd == INVALID_FD ||
1171 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1172 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1174 tdir = (tmpdir ? tmpdir : part);
1176 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1177 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1179 snprintf(summaryFileName, sizeof summaryFileName,
1180 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1182 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1183 if (summaryFile == INVALID_FD) {
1184 Abort("Unable to create inode summary file\n");
1188 /* Using nt_unlink here since we're really using the delete on close
1189 * semantics of unlink. In most places in the salvager, we really do
1190 * mean to unlink the file at that point. Those places have been
1191 * modified to actually do that so that the NT crt can be used there.
1193 * jaltman - As commented elsewhere, this cannot work because fopen()
1194 * does not open files with DELETE and FILE_SHARE_DELETE.
1196 code = nt_unlink(summaryFileName);
1198 code = unlink(summaryFileName);
1201 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1204 if (!canfork || debug || Fork() == 0) {
1205 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1207 OS_CLOSE(summaryFile);
1208 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1209 RemoveTheForce(salvinfo->fileSysPath);
1211 struct VolumeSummary *vsp;
1215 GetVolumeSummary(salvinfo, singleVolumeNumber);
1217 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1218 if (vsp->fileName) {
1219 if (vsp->header.id == singleVolumeNumber) {
1222 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1228 MaybeAskOnline(salvinfo, singleVolumeNumber);
1230 /* make sure we get rid of stray .vol headers, even if
1231 * they're not in our volume summary (might happen if
1232 * e.g. something else created them and they're not in the
1233 * fileserver VGC) */
1234 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1235 singleVolumeNumber, 0 /*parent*/);
1236 AskDelete(salvinfo, singleVolumeNumber);
1240 Log("%s vice inodes on %s; not salvaged\n",
1241 singleVolumeNumber ? "No applicable" : "No", dev);
1246 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1248 OS_CLOSE(summaryFile);
1250 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1253 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1254 OS_CLOSE(summaryFile);
1255 Abort("Unable to read inode table; %s not salvaged\n", dev);
1257 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1258 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1259 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1260 OS_CLOSE(summaryFile);
1261 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1266 CountVolumeInodes(ip, nInodes, &summary);
1267 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1268 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1269 OS_CLOSE(summaryFile);
1273 summary.index += (summary.nInodes);
1274 nInodes -= summary.nInodes;
1275 ip += summary.nInodes;
1278 ip = ip_save = NULL;
1279 /* Following fflush is not fclose, because if it was debug mode would not work */
1280 if (OS_SYNC(summaryFile) == -1) {
1281 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1282 OS_CLOSE(summaryFile);
1286 if (canfork && !debug) {
1291 if (Wait("Inode summary") == -1) {
1292 OS_CLOSE(summaryFile);
1293 Exit(1); /* salvage of this partition aborted */
1297 st_size = OS_SIZE(summaryFile);
1298 osi_Assert(st_size >= 0);
1301 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1302 osi_Assert(salvinfo->inodeSummary != NULL);
1303 /* For GNU we need to do lseek to get the file pointer moved. */
1304 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1305 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1306 osi_Assert(ret == st_size);
1308 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1309 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1310 salvinfo->inodeSummary[i].volSummary = NULL;
1312 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1313 OS_CLOSE(summaryFile);
1316 if (retcode && singleVolumeNumber && !deleted) {
1317 AskError(salvinfo, singleVolumeNumber);
1323 /* Comparison routine for volume sort.
1324 This is setup so that a read-write volume comes immediately before
1325 any read-only clones of that volume */
1327 CompareVolumes(const void *_p1, const void *_p2)
1329 const struct VolumeSummary *p1 = _p1;
1330 const struct VolumeSummary *p2 = _p2;
1331 if (p1->header.parent != p2->header.parent)
1332 return p1->header.parent < p2->header.parent ? -1 : 1;
1333 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1335 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1337 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1341 * Gleans volumeSummary information by asking the fileserver
1343 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1344 * salvaging a whole partition
1346 * @return whether we obtained the volume summary information or not
1347 * @retval 0 success; we obtained the volume summary information
1348 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1350 * @retval 1 we did not get the volume summary information; either the
1351 * fileserver responded with an error, or we are not supposed to
1352 * ask the fileserver for the information (e.g. we are salvaging
1353 * the entire partition or we are not the salvageserver)
1355 * @note for non-DAFS, always returns 1
1358 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1361 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1362 if (programType == salvageServer) {
1363 if (singleVolumeNumber) {
1364 FSSYNC_VGQry_response_t q_res;
1366 struct VolumeSummary *vsp;
1368 struct VolumeDiskHeader diskHdr;
1370 memset(&res, 0, sizeof(res));
1372 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1375 * We must wait for the partition to finish scanning before
1376 * can continue, since we will not know if we got the entire
1377 * VG membership unless the partition is fully scanned.
1378 * We could, in theory, just scan the partition ourselves if
1379 * the VG cache is not ready, but we would be doing the exact
1380 * same scan the fileserver is doing; it will almost always
1381 * be faster to wait for the fileserver. The only exceptions
1382 * are if the partition does not take very long to scan, and
1383 * in that case it's fast either way, so who cares?
1385 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1386 Log("waiting for fileserver to finish scanning partition %s...\n",
1387 salvinfo->fileSysPartition->name);
1389 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1390 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1391 * just so small partitions don't need to wait over 10
1392 * seconds every time, and large partitions are generally
1393 * polled only once every ten seconds. */
1394 sleep((i > 10) ? (i = 10) : i);
1396 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1400 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1401 /* This can happen if there's no header for the volume
1402 * we're salvaging, or no headers exist for the VG (if
1403 * we're salvaging an RW). Act as if we got a response
1404 * with no VG members. The headers may be created during
1405 * salvaging, if there are inodes in this VG. */
1407 memset(&q_res, 0, sizeof(q_res));
1408 q_res.rw = singleVolumeNumber;
1412 Log("fileserver refused VGCQuery request for volume %lu on "
1413 "partition %s, code %ld reason %ld\n",
1414 afs_printable_uint32_lu(singleVolumeNumber),
1415 salvinfo->fileSysPartition->name,
1416 afs_printable_int32_ld(code),
1417 afs_printable_int32_ld(res.hdr.reason));
1421 if (q_res.rw != singleVolumeNumber) {
1422 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1423 afs_printable_uint32_lu(singleVolumeNumber),
1424 afs_printable_uint32_lu(q_res.rw));
1425 #ifdef SALVSYNC_BUILD_CLIENT
1426 if (SALVSYNC_LinkVolume(q_res.rw,
1428 salvinfo->fileSysPartition->name,
1430 Log("schedule request failed\n");
1432 #endif /* SALVSYNC_BUILD_CLIENT */
1433 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1436 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1437 osi_Assert(salvinfo->volumeSummaryp != NULL);
1439 salvinfo->nVolumes = 0;
1440 vsp = salvinfo->volumeSummaryp;
1442 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1443 char name[VMAXPATHLEN];
1445 if (!q_res.children[i]) {
1449 /* AskOffline for singleVolumeNumber was called much earlier */
1450 if (q_res.children[i] != singleVolumeNumber) {
1451 AskOffline(salvinfo, q_res.children[i]);
1452 if (LockVolume(salvinfo, q_res.children[i])) {
1458 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1460 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1461 afs_printable_uint32_lu(q_res.children[i]));
1466 DiskToVolumeHeader(&vsp->header, &diskHdr);
1467 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1468 vsp->fileName = ToString(name);
1469 salvinfo->nVolumes++;
1473 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1478 Log("Cannot get volume summary from fileserver; falling back to scanning "
1479 "entire partition\n");
1482 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1487 * count how many volume headers are found by VWalkVolumeHeaders.
1489 * @param[in] dp the disk partition (unused)
1490 * @param[in] name full path to the .vol header (unused)
1491 * @param[in] hdr the header data (unused)
1492 * @param[in] last whether this is the last try or not (unused)
1493 * @param[in] rock actually an afs_int32*; the running count of how many
1494 * volumes we have found
1499 CountHeader(struct DiskPartition64 *dp, const char *name,
1500 struct VolumeDiskHeader *hdr, int last, void *rock)
1502 afs_int32 *nvols = (afs_int32 *)rock;
1508 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1511 struct SalvageScanParams {
1512 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1513 * vol id of the VG we're salvaging */
1514 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1515 * we're filling in */
1516 afs_int32 nVolumes; /**< # of vols we've encountered */
1517 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1518 * # of vols we've alloc'd memory for) */
1519 int retry; /**< do we need to retry vol lock/checkout? */
1520 struct SalvInfo *salvinfo; /**< salvage job info */
1524 * records volume summary info found from VWalkVolumeHeaders.
1526 * Found volumes are also taken offline if they are in the specific volume
1527 * group we are looking for.
1529 * @param[in] dp the disk partition
1530 * @param[in] name full path to the .vol header
1531 * @param[in] hdr the header data
1532 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1533 * @param[in] rock actually a struct SalvageScanParams*, containing the
1534 * information needed to record the volume summary data
1536 * @return operation status
1538 * @retval -1 volume locking raced with fileserver restart; checking out
1539 * and locking volumes needs to be retried
1540 * @retval 1 volume header is mis-named and should be deleted
1543 RecordHeader(struct DiskPartition64 *dp, const char *name,
1544 struct VolumeDiskHeader *hdr, int last, void *rock)
1546 char nameShouldBe[64];
1547 struct SalvageScanParams *params;
1548 struct VolumeSummary summary;
1549 VolumeId singleVolumeNumber;
1550 struct SalvInfo *salvinfo;
1552 params = (struct SalvageScanParams *)rock;
1554 memset(&summary, 0, sizeof(summary));
1556 singleVolumeNumber = params->singleVolumeNumber;
1557 salvinfo = params->salvinfo;
1559 DiskToVolumeHeader(&summary.header, hdr);
1561 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1562 && summary.header.parent != singleVolumeNumber) {
1564 if (programType == salvageServer) {
1565 #ifdef SALVSYNC_BUILD_CLIENT
1566 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1567 summary.header.id, summary.header.parent);
1568 if (SALVSYNC_LinkVolume(summary.header.parent,
1572 Log("schedule request failed\n");
1575 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1578 Log("%u is a read-only volume; not salvaged\n",
1579 singleVolumeNumber);
1584 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1585 || summary.header.parent == singleVolumeNumber) {
1587 /* check if the header file is incorrectly named */
1589 const char *base = strrchr(name, OS_DIRSEPC);
1596 snprintf(nameShouldBe, sizeof nameShouldBe,
1597 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1600 if (strcmp(nameShouldBe, base)) {
1601 /* .vol file has wrong name; retry/delete */
1605 if (!badname || last) {
1606 /* only offline the volume if the header is good, or if this is
1607 * the last try looking at it; avoid AskOffline'ing the same vol
1610 if (singleVolumeNumber
1611 && summary.header.id != singleVolumeNumber) {
1612 /* don't offline singleVolumeNumber; we already did that
1615 AskOffline(salvinfo, summary.header.id);
1617 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1619 /* don't lock the volume if the header is bad, since we're
1620 * about to delete it anyway. */
1621 if (LockVolume(salvinfo, summary.header.id)) {
1626 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1630 if (last && !Showmode) {
1631 Log("Volume header file %s is incorrectly named (should be %s "
1632 "not %s); %sdeleted (it will be recreated later, if "
1633 "necessary)\n", name, nameShouldBe, base,
1634 (Testing ? "it would have been " : ""));
1639 summary.fileName = ToString(base);
1642 if (params->nVolumes > params->totalVolumes) {
1643 /* We found more volumes than we found on the first partition walk;
1644 * apparently something created a volume while we were
1645 * partition-salvaging, or we found more than 20 vols when salvaging a
1646 * particular volume. Abort if we detect this, since other programs
1647 * supposed to not touch the partition while it is partition-salvaging,
1648 * and we shouldn't find more than 20 vols in a VG.
1650 Abort("Found %ld vol headers, but should have found at most %ld! "
1651 "Make sure the volserver/fileserver are not running at the "
1652 "same time as a partition salvage\n",
1653 afs_printable_int32_ld(params->nVolumes),
1654 afs_printable_int32_ld(params->totalVolumes));
1657 memcpy(params->vsp, &summary, sizeof(summary));
1665 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1667 * If the header could not be read in at all, the header is always unlinked.
1668 * If instead RecordHeader said the header was bad (that is, the header file
1669 * is mis-named), we only unlink if we are doing a partition salvage, as
1670 * opposed to salvaging a specific volume group.
1672 * @param[in] dp the disk partition
1673 * @param[in] name full path to the .vol header
1674 * @param[in] hdr header data, or NULL if the header could not be read
1675 * @param[in] rock actually a struct SalvageScanParams*, with some information
1679 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1680 struct VolumeDiskHeader *hdr, void *rock)
1682 struct SalvageScanParams *params;
1685 params = (struct SalvageScanParams *)rock;
1688 /* no header; header is too bogus to read in at all */
1690 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1696 } else if (!params->singleVolumeNumber) {
1697 /* We were able to read in a header, but RecordHeader said something
1698 * was wrong with it. We only unlink those if we are doing a partition
1705 if (dounlink && unlink(name)) {
1706 Log("Error %d while trying to unlink %s\n", errno, name);
1711 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1712 * the fileserver for VG information, or by scanning the /vicepX partition.
1714 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1715 * are salvaging, or 0 if this is a partition
1718 * @return operation status
1720 * @retval -1 we raced with a fileserver restart; checking out and locking
1721 * volumes must be retried
1724 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1726 afs_int32 nvols = 0;
1727 struct SalvageScanParams params;
1730 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1732 /* we successfully got the vol information from the fileserver; no
1733 * need to scan the partition */
1737 /* we need to retry volume checkout */
1741 if (!singleVolumeNumber) {
1742 /* Count how many volumes we have in /vicepX */
1743 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1746 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1751 nvols = VOL_VG_MAX_VOLS;
1754 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1755 osi_Assert(salvinfo->volumeSummaryp != NULL);
1757 params.singleVolumeNumber = singleVolumeNumber;
1758 params.vsp = salvinfo->volumeSummaryp;
1759 params.nVolumes = 0;
1760 params.totalVolumes = nvols;
1762 params.salvinfo = salvinfo;
1764 /* walk the partition directory of volume headers and record the info
1765 * about them; unlinking invalid headers */
1766 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1767 UnlinkHeader, ¶ms);
1769 /* we apparently need to retry checking-out/locking volumes */
1773 Abort("Failed to get volume header summary\n");
1775 salvinfo->nVolumes = params.nVolumes;
1777 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1783 /* Find the link table. This should be associated with the RW volume or, if
1784 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1787 FindLinkHandle(struct InodeSummary *isp, int nVols,
1788 struct ViceInodeInfo *allInodes)
1791 struct ViceInodeInfo *ip;
1793 for (i = 0; i < nVols; i++) {
1794 ip = allInodes + isp[i].index;
1795 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1796 if (ip[j].u.special.type == VI_LINKTABLE)
1797 return ip[j].inodeNumber;
1804 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1806 struct versionStamp version;
1809 if (!VALID_INO(ino))
1811 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1812 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1813 if (!VALID_INO(ino))
1815 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1816 isp->RWvolumeId, errno);
1817 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1818 fdP = IH_OPEN(salvinfo->VGLinkH);
1820 Abort("Can't open link table for volume %u (error = %d)\n",
1821 isp->RWvolumeId, errno);
1823 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1824 Abort("Can't truncate link table for volume %u (error = %d)\n",
1825 isp->RWvolumeId, errno);
1827 version.magic = LINKTABLEMAGIC;
1828 version.version = LINKTABLEVERSION;
1830 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1832 Abort("Can't truncate link table for volume %u (error = %d)\n",
1833 isp->RWvolumeId, errno);
1835 FDH_REALLYCLOSE(fdP);
1837 /* If the volume summary exits (i.e., the V*.vol header file exists),
1838 * then set this inode there as well.
1840 if (isp->volSummary)
1841 isp->volSummary->header.linkTable = ino;
1850 SVGParms_t *parms = (SVGParms_t *) arg;
1851 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1856 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1859 pthread_attr_t tattr;
1863 /* Initialize per volume global variables, even if later code does so */
1864 salvinfo->VolumeChanged = 0;
1865 salvinfo->VGLinkH = NULL;
1866 salvinfo->VGLinkH_cnt = 0;
1867 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1869 parms.svgp_inodeSummaryp = isp;
1870 parms.svgp_count = nVols;
1871 parms.svgp_salvinfo = salvinfo;
1872 code = pthread_attr_init(&tattr);
1874 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1878 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1880 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1883 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1885 Log("Failed to create thread to salvage volume group %u\n",
1889 (void)pthread_join(tid, NULL);
1891 #endif /* AFS_NT40_ENV */
1894 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1896 struct ViceInodeInfo *inodes, *allInodes, *ip;
1897 int i, totalInodes, size, salvageTo;
1901 int dec_VGLinkH = 0;
1903 FdHandle_t *fdP = NULL;
1905 salvinfo->VGLinkH_cnt = 0;
1906 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1907 && isp->nSpecialInodes > 0);
1908 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1909 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1912 if (ShowMounts && !haveRWvolume)
1914 if (canfork && !debug && Fork() != 0) {
1915 (void)Wait("Salvage volume group");
1918 for (i = 0, totalInodes = 0; i < nVols; i++)
1919 totalInodes += isp[i].nInodes;
1920 size = totalInodes * sizeof(struct ViceInodeInfo);
1921 inodes = (struct ViceInodeInfo *)malloc(size);
1922 allInodes = inodes - isp->index; /* this would the base of all the inodes
1923 * for the partition, if all the inodes
1924 * had been read into memory */
1926 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1928 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1930 /* Don't try to salvage a read write volume if there isn't one on this
1932 salvageTo = haveRWvolume ? 0 : 1;
1934 #ifdef AFS_NAMEI_ENV
1935 ino = FindLinkHandle(isp, nVols, allInodes);
1936 if (VALID_INO(ino)) {
1937 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1938 fdP = IH_OPEN(salvinfo->VGLinkH);
1940 if (VALID_INO(ino) && fdP != NULL) {
1941 struct versionStamp header;
1942 afs_sfsize_t nBytes;
1944 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1945 if (nBytes != sizeof(struct versionStamp)
1946 || header.magic != LINKTABLEMAGIC) {
1947 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1948 FDH_REALLYCLOSE(fdP);
1952 if (!VALID_INO(ino) || fdP == NULL) {
1953 Log("%s link table for volume %u.\n",
1954 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1956 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1959 struct ViceInodeInfo *ip;
1960 CreateLinkTable(salvinfo, isp, ino);
1961 fdP = IH_OPEN(salvinfo->VGLinkH);
1962 /* Sync fake 1 link counts to the link table, now that it exists */
1964 for (i = 0; i < nVols; i++) {
1965 ip = allInodes + isp[i].index;
1966 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1967 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1974 FDH_REALLYCLOSE(fdP);
1976 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1979 /* Salvage in reverse order--read/write volume last; this way any
1980 * Inodes not referenced by the time we salvage the read/write volume
1981 * can be picked up by the read/write volume */
1982 /* ACTUALLY, that's not done right now--the inodes just vanish */
1983 for (i = nVols - 1; i >= salvageTo; i--) {
1985 struct InodeSummary *lisp = &isp[i];
1986 #ifdef AFS_NAMEI_ENV
1987 /* If only the RO is present on this partition, the link table
1988 * shows up as a RW volume special file. Need to make sure the
1989 * salvager doesn't try to salvage the non-existent RW.
1991 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1992 /* If this only special inode is the link table, continue */
1993 if (inodes->u.special.type == VI_LINKTABLE) {
2000 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2001 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
2002 /* Check inodes twice. The second time do things seriously. This
2003 * way the whole RO volume can be deleted, below, if anything goes wrong */
2004 for (check = 1; check >= 0; check--) {
2006 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2008 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2009 if (rw && deleteMe) {
2010 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2011 * volume won't be called */
2017 if (rw && check == 1)
2019 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2020 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2026 /* Fix actual inode counts */
2029 Log("totalInodes %d\n",totalInodes);
2030 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2031 static int TraceBadLinkCounts = 0;
2032 #ifdef AFS_NAMEI_ENV
2033 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2034 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2035 VGLinkH_p1 = ip->u.param[0];
2036 continue; /* Deal with this last. */
2039 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2040 TraceBadLinkCounts--; /* Limit reports, per volume */
2041 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2043 while (ip->linkCount > 0) {
2044 /* below used to assert, not break */
2046 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2047 Log("idec failed. inode %s errno %d\n",
2048 PrintInode(stmp, ip->inodeNumber), errno);
2054 while (ip->linkCount < 0) {
2055 /* these used to be asserts */
2057 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2058 Log("iinc failed. inode %s errno %d\n",
2059 PrintInode(stmp, ip->inodeNumber), errno);
2066 #ifdef AFS_NAMEI_ENV
2067 while (dec_VGLinkH > 0) {
2068 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2069 Log("idec failed on link table, errno = %d\n", errno);
2073 while (dec_VGLinkH < 0) {
2074 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2075 Log("iinc failed on link table, errno = %d\n", errno);
2082 /* Directory consistency checks on the rw volume */
2084 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2085 IH_RELEASE(salvinfo->VGLinkH);
2087 if (canfork && !debug) {
2094 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2096 /* Check headers BEFORE forking */
2100 for (i = 0; i < nVols; i++) {
2101 struct VolumeSummary *vs = isp[i].volSummary;
2102 VolumeDiskData volHeader;
2104 /* Don't salvage just because phantom rw volume is there... */
2105 /* (If a read-only volume exists, read/write inodes must also exist) */
2106 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2110 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2111 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2112 == sizeof(volHeader)
2113 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2114 && volHeader.dontSalvage == DONT_SALVAGE
2115 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2116 if (volHeader.inUse != 0) {
2117 volHeader.inUse = 0;
2118 volHeader.inService = 1;
2120 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2121 != sizeof(volHeader)) {
2137 /* SalvageVolumeHeaderFile
2139 * Salvage the top level V*.vol header file. Make sure the special files
2140 * exist and that there are no duplicates.
2142 * Calls SalvageHeader for each possible type of volume special file.
2146 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2147 struct ViceInodeInfo *inodes, int RW,
2148 int check, int *deleteMe)
2151 struct ViceInodeInfo *ip;
2152 int allinodesobsolete = 1;
2153 struct VolumeDiskHeader diskHeader;
2154 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2156 struct VolumeHeader tempHeader;
2157 struct afs_inode_info stuff[MAXINODETYPE];
2159 /* keeps track of special inodes that are probably 'good'; they are
2160 * referenced in the vol header, and are included in the given inodes
2165 } goodspecial[MAXINODETYPE];
2170 memset(goodspecial, 0, sizeof(goodspecial));
2172 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2174 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2176 Log("cannot allocate memory for inode skip array when salvaging "
2177 "volume %lu; not performing duplicate special inode recovery\n",
2178 afs_printable_uint32_lu(isp->volumeId));
2179 /* still try to perform the salvage; the skip array only does anything
2180 * if we detect duplicate special inodes */
2183 init_inode_info(&tempHeader, stuff);
2186 * First, look at the special inodes and see if any are referenced by
2187 * the existing volume header. If we find duplicate special inodes, we
2188 * can use this information to use the referenced inode (it's more
2189 * likely to be the 'good' one), and throw away the duplicates.
2191 if (isp->volSummary && skip) {
2192 /* use tempHeader, so we can use the stuff[] array to easily index
2193 * into the isp->volSummary special inodes */
2194 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2196 for (i = 0; i < isp->nSpecialInodes; i++) {
2197 ip = &inodes[isp->index + i];
2198 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2199 /* will get taken care of in a later loop */
2202 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2203 goodspecial[ip->u.special.type-1].valid = 1;
2204 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2209 memset(&tempHeader, 0, sizeof(tempHeader));
2210 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2211 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2212 tempHeader.id = isp->volumeId;
2213 tempHeader.parent = isp->RWvolumeId;
2215 /* Check for duplicates (inodes are sorted by type field) */
2216 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2217 ip = &inodes[isp->index + i];
2218 if (ip->u.special.type == (ip + 1)->u.special.type) {
2219 afs_ino_str_t stmp1, stmp2;
2221 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2222 /* Will be caught in the loop below */
2226 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2227 ip->u.special.type, isp->volumeId,
2228 PrintInode(stmp1, ip->inodeNumber),
2229 PrintInode(stmp2, (ip+1)->inodeNumber));
2231 if (skip && goodspecial[ip->u.special.type-1].valid) {
2232 Inode gi = goodspecial[ip->u.special.type-1].inode;
2235 Log("using special inode referenced by vol header (%s)\n",
2236 PrintInode(stmp1, gi));
2239 /* the volume header references some special inode of
2240 * this type in the inodes array; are we it? */
2241 if (ip->inodeNumber != gi) {
2243 } else if ((ip+1)->inodeNumber != gi) {
2244 /* in case this is the last iteration; we need to
2245 * make sure we check ip+1, too */
2250 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2258 for (i = 0; i < isp->nSpecialInodes; i++) {
2260 ip = &inodes[isp->index + i];
2261 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2263 Log("Rubbish header inode %s of type %d\n",
2264 PrintInode(stmp, ip->inodeNumber),
2265 ip->u.special.type);
2271 Log("Rubbish header inode %s of type %d; deleted\n",
2272 PrintInode(stmp, ip->inodeNumber),
2273 ip->u.special.type);
2274 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2275 if (skip && skip[i]) {
2276 if (orphans == ORPH_REMOVE) {
2277 Log("Removing orphan special inode %s of type %d\n",
2278 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2281 Log("Ignoring orphan special inode %s of type %d\n",
2282 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2283 /* fall through to the ip->linkCount--; line below */
2286 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2287 allinodesobsolete = 0;
2289 if (!check && ip->u.special.type != VI_LINKTABLE)
2290 ip->linkCount--; /* Keep the inode around */
2298 if (allinodesobsolete) {
2305 salvinfo->VGLinkH_cnt++; /* one for every header. */
2307 if (!RW && !check && isp->volSummary) {
2308 ClearROInUseBit(isp->volSummary);
2312 for (i = 0; i < MAXINODETYPE; i++) {
2313 if (stuff[i].inodeType == VI_LINKTABLE) {
2314 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2315 * And we may have recreated the link table earlier, so set the
2316 * RW header as well. The header magic was already checked.
2318 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2319 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2323 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2327 if (isp->volSummary == NULL) {
2329 char headerName[64];
2330 snprintf(headerName, sizeof headerName, VFORMAT,
2331 afs_printable_uint32_lu(isp->volumeId));
2332 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2333 salvinfo->fileSysPath, headerName);
2335 Log("No header file for volume %u\n", isp->volumeId);
2339 Log("No header file for volume %u; %screating %s\n",
2340 isp->volumeId, (Testing ? "it would have been " : ""),
2342 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2343 isp->volSummary->fileName = ToString(headerName);
2345 writefunc = VCreateVolumeDiskHeader;
2348 char headerName[64];
2349 /* hack: these two fields are obsolete... */
2350 isp->volSummary->header.volumeAcl = 0;
2351 isp->volSummary->header.volumeMountTable = 0;
2354 (&isp->volSummary->header, &tempHeader,
2355 sizeof(struct VolumeHeader))) {
2356 /* We often remove the name before calling us, so we make a fake one up */
2357 if (isp->volSummary->fileName) {
2358 strcpy(headerName, isp->volSummary->fileName);
2360 snprintf(headerName, sizeof headerName, VFORMAT,
2361 afs_printable_uint32_lu(isp->volumeId));
2362 isp->volSummary->fileName = ToString(headerName);
2364 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2365 salvinfo->fileSysPath, headerName);
2367 Log("Header file %s is damaged or no longer valid%s\n", path,
2368 (check ? "" : "; repairing"));
2372 writefunc = VWriteVolumeDiskHeader;
2376 memcpy(&isp->volSummary->header, &tempHeader,
2377 sizeof(struct VolumeHeader));
2380 Log("It would have written a new header file for volume %u\n",
2384 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2385 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2387 Log("Error %ld writing volume header file for volume %lu\n",
2388 afs_printable_int32_ld(code),
2389 afs_printable_uint32_lu(diskHeader.id));
2394 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2395 isp->volSummary->header.volumeInfo);
2400 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2401 struct InodeSummary *isp, int check, int *deleteMe)
2404 VolumeDiskData volumeInfo;
2405 struct versionStamp fileHeader;
2414 #ifndef AFS_NAMEI_ENV
2415 if (sp->inodeType == VI_LINKTABLE)
2416 return 0; /* header magic was already checked */
2418 if (*(sp->inode) == 0) {
2420 Log("Missing inode in volume header (%s)\n", sp->description);
2424 Log("Missing inode in volume header (%s); %s\n", sp->description,
2425 (Testing ? "it would have recreated it" : "recreating"));
2428 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2429 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2430 if (!VALID_INO(*(sp->inode)))
2432 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2433 sp->description, errno);
2438 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2439 fdP = IH_OPEN(specH);
2440 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2441 /* bail out early and destroy the volume */
2443 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2450 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2451 sp->description, errno);
2454 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2455 || header.fileHeader.magic != sp->stamp.magic)) {
2457 Log("Part of the header (%s) is corrupted\n", sp->description);
2458 FDH_REALLYCLOSE(fdP);
2462 Log("Part of the header (%s) is corrupted; recreating\n",
2465 /* header can be garbage; make sure we don't read garbage data from
2467 memset(&header, 0, sizeof(header));
2469 if (sp->inodeType == VI_VOLINFO
2470 && header.volumeInfo.destroyMe == DESTROY_ME) {
2473 FDH_REALLYCLOSE(fdP);
2477 if (recreate && !Testing) {
2480 ("Internal error: recreating volume header (%s) in check mode\n",
2482 nBytes = FDH_TRUNC(fdP, 0);
2484 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2485 sp->description, errno);
2487 /* The following code should be moved into vutil.c */
2488 if (sp->inodeType == VI_VOLINFO) {
2490 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2491 header.volumeInfo.stamp = sp->stamp;
2492 header.volumeInfo.id = isp->volumeId;
2493 header.volumeInfo.parentId = isp->RWvolumeId;
2494 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2495 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2496 isp->volumeId, isp->volumeId);
2497 header.volumeInfo.inService = 0;
2498 header.volumeInfo.blessed = 0;
2499 /* The + 1000 is a hack in case there are any files out in venus caches */
2500 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2501 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2502 header.volumeInfo.needsCallback = 0;
2503 gettimeofday(&tp, NULL);
2504 header.volumeInfo.creationDate = tp.tv_sec;
2506 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2507 sizeof(header.volumeInfo), 0);
2508 if (nBytes != sizeof(header.volumeInfo)) {
2511 ("Unable to write volume header file (%s) (errno = %d)\n",
2512 sp->description, errno);
2513 Abort("Unable to write entire volume header file (%s)\n",
2517 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2518 if (nBytes != sizeof(sp->stamp)) {
2521 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2522 sp->description, errno);
2524 ("Unable to write entire version stamp in volume header file (%s)\n",
2529 FDH_REALLYCLOSE(fdP);
2531 if (sp->inodeType == VI_VOLINFO) {
2532 salvinfo->VolInfo = header.volumeInfo;
2536 if (salvinfo->VolInfo.updateDate) {
2537 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2539 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2540 salvinfo->VolInfo.id,
2541 (Testing ? "it would have been " : ""), update);
2543 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2545 Log("%s (%u) not updated (created %s)\n",
2546 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2556 SalvageVnodes(struct SalvInfo *salvinfo,
2557 struct InodeSummary *rwIsp,
2558 struct InodeSummary *thisIsp,
2559 struct ViceInodeInfo *inodes, int check)
2561 int ilarge, ismall, ioffset, RW, nInodes;
2562 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2565 RW = (rwIsp == thisIsp);
2566 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2568 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2569 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2570 if (check && ismall == -1)
2573 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2574 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2575 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2579 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2580 struct ViceInodeInfo *ip, int nInodes,
2581 struct VolumeSummary *volSummary, int check)
2583 char buf[SIZEOF_LARGEDISKVNODE];
2584 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2586 StreamHandle_t *file;
2587 struct VnodeClassInfo *vcp;
2589 afs_sfsize_t nVnodes;
2590 afs_fsize_t vnodeLength;
2592 afs_ino_str_t stmp1, stmp2;
2596 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2597 fdP = IH_OPEN(handle);
2598 osi_Assert(fdP != NULL);
2599 file = FDH_FDOPEN(fdP, "r+");
2600 osi_Assert(file != NULL);
2601 vcp = &VnodeClassInfo[class];
2602 size = OS_SIZE(fdP->fd_fd);
2603 osi_Assert(size != -1);
2604 nVnodes = (size / vcp->diskSize) - 1;
2606 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2607 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2611 for (vnodeIndex = 0;
2612 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2613 nVnodes--, vnodeIndex++) {
2614 if (vnode->type != vNull) {
2615 int vnodeChanged = 0;
2616 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2617 if (VNDISK_GET_INO(vnode) == 0) {
2619 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2620 memset(vnode, 0, vcp->diskSize);
2624 if (vcp->magic != vnode->vnodeMagic) {
2625 /* bad magic #, probably partially created vnode */
2627 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2628 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2629 afs_printable_uint32_lu(vcp->magic));
2630 memset(vnode, 0, vcp->diskSize);
2634 Log("Partially allocated vnode %d deleted.\n",
2636 memset(vnode, 0, vcp->diskSize);
2640 /* ****** Should do a bit more salvage here: e.g. make sure
2641 * vnode type matches what it should be given the index */
2642 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2643 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2644 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2645 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2652 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2653 /* The following doesn't work, because the version number
2654 * is not maintained correctly by the file server */
2655 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2656 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2658 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2664 /* For RW volume, look for vnode with matching inode number;
2665 * if no such match, take the first determined by our sort
2667 struct ViceInodeInfo *lip = ip;
2668 int lnInodes = nInodes;
2670 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2671 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2680 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2681 /* "Matching" inode */
2685 vu = vnode->uniquifier;
2686 iu = ip->u.vnode.vnodeUniquifier;
2687 vd = vnode->dataVersion;
2688 id = ip->u.vnode.inodeDataVersion;
2690 * Because of the possibility of the uniquifier overflows (> 4M)
2691 * we compare them modulo the low 22-bits; we shouldn't worry
2692 * about mismatching since they shouldn't to many old
2693 * uniquifiers of the same vnode...
2695 if (IUnique(vu) != IUnique(iu)) {
2697 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2700 vnode->uniquifier = iu;
2701 #ifdef AFS_3DISPARES
2702 vnode->dataVersion = (id >= vd ?
2705 1887437 ? vd : id) :
2708 1887437 ? id : vd));
2710 #if defined(AFS_SGI_EXMAG)
2711 vnode->dataVersion = (id >= vd ?
2714 15099494 ? vd : id) :
2717 15099494 ? id : vd));
2719 vnode->dataVersion = (id > vd ? id : vd);
2720 #endif /* AFS_SGI_EXMAG */
2721 #endif /* AFS_3DISPARES */
2724 /* don't bother checking for vd > id any more, since
2725 * partial file transfers always result in this state,
2726 * and you can't do much else anyway (you've already
2727 * found the best data you can) */
2728 #ifdef AFS_3DISPARES
2729 if (!vnodeIsDirectory(vnodeNumber)
2730 && ((vd < id && (id - vd) < 1887437)
2731 || ((vd > id && (vd - id) > 1887437)))) {
2733 #if defined(AFS_SGI_EXMAG)
2734 if (!vnodeIsDirectory(vnodeNumber)
2735 && ((vd < id && (id - vd) < 15099494)
2736 || ((vd > id && (vd - id) > 15099494)))) {
2738 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2739 #endif /* AFS_SGI_EXMAG */
2742 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2743 vnode->dataVersion = id;
2748 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2751 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2753 VNDISK_SET_INO(vnode, ip->inodeNumber);
2758 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2760 VNDISK_SET_INO(vnode, ip->inodeNumber);
2763 VNDISK_GET_LEN(vnodeLength, vnode);
2764 if (ip->byteCount != vnodeLength) {
2767 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2772 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2773 VNDISK_SET_LEN(vnode, ip->byteCount);
2777 ip->linkCount--; /* Keep the inode around */
2780 } else { /* no matching inode */
2782 if (VNDISK_GET_INO(vnode) != 0
2783 || vnode->type == vDirectory) {
2784 /* No matching inode--get rid of the vnode */
2786 if (VNDISK_GET_INO(vnode)) {
2788 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2792 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2797 if (VNDISK_GET_INO(vnode)) {
2799 time_t serverModifyTime = vnode->serverModifyTime;
2800 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2804 time_t serverModifyTime = vnode->serverModifyTime;
2805 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2808 memset(vnode, 0, vcp->diskSize);
2811 /* Should not reach here becuase we checked for
2812 * (inodeNumber == 0) above. And where we zero the vnode,
2813 * we also goto vnodeDone.
2817 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2821 } /* VNDISK_GET_INO(vnode) != 0 */
2823 osi_Assert(!(vnodeChanged && check));
2824 if (vnodeChanged && !Testing) {
2825 osi_Assert(IH_IWRITE
2826 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2827 (char *)vnode, vcp->diskSize)
2829 salvinfo->VolumeChanged = 1; /* For break call back */
2840 struct VnodeEssence *
2841 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2844 struct VnodeInfo *vip;
2847 class = vnodeIdToClass(vnodeNumber);
2848 vip = &salvinfo->vnodeInfo[class];
2849 offset = vnodeIdToBitNumber(vnodeNumber);
2850 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2854 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2856 /* Copy the directory unconditionally if we are going to change it:
2857 * not just if was cloned.
2859 struct VnodeDiskObject vnode;
2860 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2861 Inode oldinode, newinode;
2864 if (dir->copied || Testing)
2866 DFlush(); /* Well justified paranoia... */
2869 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2870 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2872 osi_Assert(code == sizeof(vnode));
2873 oldinode = VNDISK_GET_INO(&vnode);
2874 /* Increment the version number by a whole lot to avoid problems with
2875 * clients that were promised new version numbers--but the file server
2876 * crashed before the versions were written to disk.
2879 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2880 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2882 osi_Assert(VALID_INO(newinode));
2883 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2885 VNDISK_SET_INO(&vnode, newinode);
2887 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2888 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2890 osi_Assert(code == sizeof(vnode));
2892 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2893 salvinfo->fileSysDevice, newinode,
2894 &salvinfo->VolumeChanged);
2895 /* Don't delete the original inode right away, because the directory is
2896 * still being scanned.
2902 * This function should either successfully create a new dir, or give up
2903 * and leave things the way they were. In particular, if it fails to write
2904 * the new dir properly, it should return w/o changing the reference to the
2908 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2910 struct VnodeDiskObject vnode;
2911 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2912 Inode oldinode, newinode;
2917 afs_int32 parentUnique = 1;
2918 struct VnodeEssence *vnodeEssence;
2923 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2925 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2926 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2928 osi_Assert(lcode == sizeof(vnode));
2929 oldinode = VNDISK_GET_INO(&vnode);
2930 /* Increment the version number by a whole lot to avoid problems with
2931 * clients that were promised new version numbers--but the file server
2932 * crashed before the versions were written to disk.
2935 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2936 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2938 osi_Assert(VALID_INO(newinode));
2939 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2940 &salvinfo->VolumeChanged);
2942 /* Assign . and .. vnode numbers from dir and vnode.parent.
2943 * The uniquifier for . is in the vnode.
2944 * The uniquifier for .. might be set to a bogus value of 1 and
2945 * the salvager will later clean it up.
2947 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2948 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2951 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2953 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2958 /* didn't really build the new directory properly, let's just give up. */
2959 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2960 Log("Directory salvage returned code %d, continuing.\n", code);
2962 Log("also failed to decrement link count on new inode");
2966 Log("Checking the results of the directory salvage...\n");
2967 if (!DirOK(&newdir)) {
2968 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2969 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2970 osi_Assert(code == 0);
2974 VNDISK_SET_INO(&vnode, newinode);
2975 length = afs_dir_Length(&newdir);
2976 VNDISK_SET_LEN(&vnode, length);
2978 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2979 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2981 osi_Assert(lcode == sizeof(vnode));
2984 nt_sync(salvinfo->fileSysDevice);
2986 sync(); /* this is slow, but hopefully rarely called. We don't have
2987 * an open FD on the file itself to fsync.
2991 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2993 /* make sure old directory file is really closed */
2994 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2995 FDH_REALLYCLOSE(fdP);
2997 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2998 osi_Assert(code == 0);
2999 dir->dirHandle = newdir;
3003 * arguments for JudgeEntry.
3005 struct judgeEntry_params {
3006 struct DirSummary *dir; /**< directory we're examining entries in */
3007 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3011 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3014 struct judgeEntry_params *params = arock;
3015 struct DirSummary *dir = params->dir;
3016 struct SalvInfo *salvinfo = params->salvinfo;
3017 struct VnodeEssence *vnodeEssence;
3018 afs_int32 dirOrphaned, todelete;
3020 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3022 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3023 if (vnodeEssence == NULL) {
3025 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3028 CopyOnWrite(salvinfo, dir);
3029 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3034 #ifndef AFS_NAMEI_ENV
3035 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3036 * mount inode for the partition. If this inode were deleted, it would crash
3039 if (vnodeEssence->InodeNumber == 0) {
3040 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3042 CopyOnWrite(salvinfo, dir);
3043 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3050 if (!(vnodeNumber & 1) && !Showmode
3051 && !(vnodeEssence->count || vnodeEssence->unique
3052 || vnodeEssence->modeBits)) {
3053 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3054 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3055 vnodeNumber, unique,
3056 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3060 CopyOnWrite(salvinfo, dir);
3061 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3067 /* Check if the Uniquifiers match. If not, change the directory entry
3068 * so its unique matches the vnode unique. Delete if the unique is zero
3069 * or if the directory is orphaned.
3071 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3072 if (!vnodeEssence->unique
3073 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3074 /* This is an orphaned directory. Don't delete the . or ..
3075 * entry. Otherwise, it will get created in the next
3076 * salvage and deleted again here. So Just skip it.
3081 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3084 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3088 fid.Vnode = vnodeNumber;
3089 fid.Unique = vnodeEssence->unique;
3090 CopyOnWrite(salvinfo, dir);
3091 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3093 osi_Assert(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3096 return 0; /* no need to continue */
3099 if (strcmp(name, ".") == 0) {
3100 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3103 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3105 CopyOnWrite(salvinfo, dir);
3106 osi_Assert(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3107 fid.Vnode = dir->vnodeNumber;
3108 fid.Unique = dir->unique;
3109 osi_Assert(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3112 vnodeNumber = fid.Vnode; /* Get the new Essence */
3113 unique = fid.Unique;
3114 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3117 } else if (strcmp(name, "..") == 0) {
3120 struct VnodeEssence *dotdot;
3121 pa.Vnode = dir->parent;
3122 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3123 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3124 pa.Unique = dotdot->unique;
3126 pa.Vnode = dir->vnodeNumber;
3127 pa.Unique = dir->unique;
3129 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3131 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3133 CopyOnWrite(salvinfo, dir);
3134 osi_Assert(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3135 osi_Assert(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3138 vnodeNumber = pa.Vnode; /* Get the new Essence */
3140 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3142 dir->haveDotDot = 1;
3143 } else if (strncmp(name, ".__afs", 6) == 0) {
3145 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3148 CopyOnWrite(salvinfo, dir);
3149 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3151 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3152 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3155 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3156 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3157 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3158 && !(vnodeEssence->modeBits & 0111)) {
3159 afs_sfsize_t nBytes;
3165 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3166 vnodeEssence->InodeNumber);
3169 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3173 size = FDH_SIZE(fdP);
3175 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3176 FDH_REALLYCLOSE(fdP);
3183 nBytes = FDH_PREAD(fdP, buf, size, 0);
3184 if (nBytes == size) {
3186 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3187 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3188 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3189 Testing ? "would convert" : "converted");
3190 vnodeEssence->modeBits |= 0111;
3191 vnodeEssence->changed = 1;
3192 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3193 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3194 dir->name ? dir->name : "??", name, buf);
3196 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3197 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3199 FDH_REALLYCLOSE(fdP);
3202 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3203 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3204 if (vnodeIdToClass(vnodeNumber) == vLarge
3205 && vnodeEssence->name == NULL) {
3207 if ((n = (char *)malloc(strlen(name) + 1)))
3209 vnodeEssence->name = n;
3212 /* The directory entry points to the vnode. Check to see if the
3213 * vnode points back to the directory. If not, then let the
3214 * directory claim it (else it might end up orphaned). Vnodes
3215 * already claimed by another directory are deleted from this
3216 * directory: hardlinks to the same vnode are not allowed
3217 * from different directories.
3219 if (vnodeEssence->parent != dir->vnodeNumber) {
3220 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3221 /* Vnode does not point back to this directory.
3222 * Orphaned dirs cannot claim a file (it may belong to
3223 * another non-orphaned dir).
3226 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3228 vnodeEssence->parent = dir->vnodeNumber;
3229 vnodeEssence->changed = 1;
3231 /* Vnode was claimed by another directory */
3234 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3235 } else if (vnodeNumber == 1) {
3236 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3238 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3242 CopyOnWrite(salvinfo, dir);
3243 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3248 /* This directory claims the vnode */
3249 vnodeEssence->claimed = 1;
3251 vnodeEssence->count--;
3256 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3257 VnodeClass class, Inode ino, Unique * maxu)
3259 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3260 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3261 char buf[SIZEOF_LARGEDISKVNODE];
3262 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3264 StreamHandle_t *file;
3269 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3270 fdP = IH_OPEN(vip->handle);
3271 osi_Assert(fdP != NULL);
3272 file = FDH_FDOPEN(fdP, "r+");
3273 osi_Assert(file != NULL);
3274 size = OS_SIZE(fdP->fd_fd);
3275 osi_Assert(size != -1);
3276 vip->nVnodes = (size / vcp->diskSize) - 1;
3277 if (vip->nVnodes > 0) {
3278 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3279 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3280 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3281 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3282 if (class == vLarge) {
3283 osi_Assert((vip->inodes = (Inode *)
3284 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3293 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3294 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3295 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3296 nVnodes--, vnodeIndex++) {
3297 if (vnode->type != vNull) {
3298 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3299 afs_fsize_t vnodeLength;
3300 vip->nAllocatedVnodes++;
3301 vep->count = vnode->linkCount;
3302 VNDISK_GET_LEN(vnodeLength, vnode);
3303 vep->blockCount = nBlocks(vnodeLength);
3304 vip->volumeBlockCount += vep->blockCount;
3305 vep->parent = vnode->parent;
3306 vep->unique = vnode->uniquifier;
3307 if (*maxu < vnode->uniquifier)
3308 *maxu = vnode->uniquifier;
3309 vep->modeBits = vnode->modeBits;
3310 vep->InodeNumber = VNDISK_GET_INO(vnode);
3311 vep->type = vnode->type;
3312 vep->author = vnode->author;
3313 vep->owner = vnode->owner;
3314 vep->group = vnode->group;
3315 if (vnode->type == vDirectory) {
3316 if (class != vLarge) {
3317 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3318 vip->nAllocatedVnodes--;
3319 memset(vnode, 0, sizeof(vnode));
3320 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3321 vnodeIndexOffset(vcp, vnodeNumber),
3322 (char *)&vnode, sizeof(vnode));
3323 salvinfo->VolumeChanged = 1;
3325 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3334 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3337 struct VnodeEssence *parentvp;
3343 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3344 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3345 strcat(path, OS_DIRSEP);
3346 strcat(path, vp->name);
3352 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3353 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3356 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3358 struct VnodeEssence *vep;
3361 return (1); /* Vnode zero does not exist */
3363 return (0); /* The root dir vnode is always claimed */
3364 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3365 if (!vep || !vep->claimed)
3366 return (1); /* Vnode is not claimed - it is orphaned */
3368 return (IsVnodeOrphaned(salvinfo, vep->parent));
3372 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3373 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3374 struct DirSummary *rootdir, int *rootdirfound)
3376 static struct DirSummary dir;
3377 static struct DirHandle dirHandle;
3378 struct VnodeEssence *parent;
3379 static char path[MAXPATHLEN];
3382 if (dirVnodeInfo->vnodes[i].salvaged)
3383 return; /* already salvaged */
3386 dirVnodeInfo->vnodes[i].salvaged = 1;
3388 if (dirVnodeInfo->inodes[i] == 0)
3389 return; /* Not allocated to a directory */
3391 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3392 if (dirVnodeInfo->vnodes[i].parent) {
3393 Log("Bad parent, vnode 1; %s...\n",
3394 (Testing ? "skipping" : "salvaging"));
3395 dirVnodeInfo->vnodes[i].parent = 0;
3396 dirVnodeInfo->vnodes[i].changed = 1;
3399 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3400 if (parent && parent->salvaged == 0)
3401 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3402 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3403 rootdir, rootdirfound);
3406 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3407 dir.unique = dirVnodeInfo->vnodes[i].unique;
3410 dir.parent = dirVnodeInfo->vnodes[i].parent;
3411 dir.haveDot = dir.haveDotDot = 0;
3412 dir.ds_linkH = alinkH;
3413 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3414 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3416 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3419 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3420 (Testing ? "skipping" : "salvaging"));
3423 CopyAndSalvage(salvinfo, &dir);
3425 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3428 dirHandle = dir.dirHandle;
3431 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3432 &dirVnodeInfo->vnodes[i], path);
3435 /* If enumeration failed for random reasons, we will probably delete
3436 * too much stuff, so we guard against this instead.
3438 struct judgeEntry_params judge_params;
3439 judge_params.salvinfo = salvinfo;
3440 judge_params.dir = &dir;
3442 osi_Assert(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3443 &judge_params) == 0);
3446 /* Delete the old directory if it was copied in order to salvage.
3447 * CopyOnWrite has written the new inode # to the disk, but we still
3448 * have the old one in our local structure here. Thus, we idec the
3452 if (dir.copied && !Testing) {
3453 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3454 osi_Assert(code == 0);
3455 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3458 /* Remember rootdir DirSummary _after_ it has been judged */
3459 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3460 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3468 * Get a new FID that can be used to create a new file.
3470 * @param[in] volHeader vol header for the volume
3471 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3472 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3473 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3474 * updated to the new max unique if we create a new
3478 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3479 VnodeClass class, AFSFid *afid, Unique *maxunique)
3482 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3483 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3487 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3488 /* no free vnodes; make a new one */
3489 salvinfo->vnodeInfo[class].nVnodes++;
3490 salvinfo->vnodeInfo[class].vnodes =
3491 realloc(salvinfo->vnodeInfo[class].vnodes,
3492 sizeof(struct VnodeEssence) * (i+1));
3494 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3497 afid->Vnode = bitNumberToVnodeNumber(i, class);
3499 if (volHeader->uniquifier < (*maxunique + 1)) {
3500 /* header uniq is bad; it will get bumped by 2000 later */
3501 afid->Unique = *maxunique + 1 + 2000;
3504 /* header uniq seems okay; just use that */
3505 afid->Unique = *maxunique = volHeader->uniquifier++;
3510 * Create a vnode for a README file explaining not to use a recreated-root vol.
3512 * @param[in] volHeader vol header for the volume
3513 * @param[in] alinkH ihandle for i/o for the volume
3514 * @param[in] vid volume id
3515 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3516 * updated to the new max unique if we create a new
3518 * @param[out] afid FID for the new readme vnode
3519 * @param[out] ainode the inode for the new readme file
3521 * @return operation status
3526 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3527 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3531 struct VnodeDiskObject *rvnode = NULL;
3533 IHandle_t *readmeH = NULL;
3534 struct VnodeEssence *vep;
3536 time_t now = time(NULL);
3538 /* Try to make the note brief, but informative. Only administrators should
3539 * be able to read this file at first, so we can hopefully assume they
3540 * know what AFS is, what a volume is, etc. */
3542 "This volume has been salvaged, but has lost its original root directory.\n"
3543 "The root directory that exists now has been recreated from orphan files\n"
3544 "from the rest of the volume. This recreated root directory may interfere\n"
3545 "with old cached data on clients, and there is no way the salvager can\n"
3546 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3547 "use this volume, but only copy the salvaged data to a new volume.\n"
3548 "Continuing to use this volume as it exists now may cause some clients to\n"
3549 "behave oddly when accessing this volume.\n"
3550 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3551 /* ^ the person reading this probably just lost some data, so they could
3552 * use some cheering up. */
3554 /* -1 for the trailing NUL */
3555 length = sizeof(readme) - 1;
3557 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3559 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3561 /* create the inode and write the contents */
3562 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3563 salvinfo->fileSysPath, 0, vid,
3564 afid->Vnode, afid->Unique, 1);
3565 if (!VALID_INO(readmeinode)) {
3566 Log("CreateReadme: readme IH_CREATE failed\n");
3570 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3571 bytes = IH_IWRITE(readmeH, 0, readme, length);
3572 IH_RELEASE(readmeH);
3574 if (bytes != length) {
3575 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3576 (int)sizeof(readme));
3580 /* create the vnode and write it out */
3581 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3583 Log("CreateRootDir: error alloc'ing memory\n");
3587 rvnode->type = vFile;
3589 rvnode->modeBits = 0777;
3590 rvnode->linkCount = 1;
3591 VNDISK_SET_LEN(rvnode, length);
3592 rvnode->uniquifier = afid->Unique;
3593 rvnode->dataVersion = 1;
3594 VNDISK_SET_INO(rvnode, readmeinode);
3595 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3600 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3602 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3603 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3604 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3606 if (bytes != SIZEOF_SMALLDISKVNODE) {
3607 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3608 (int)SIZEOF_SMALLDISKVNODE);
3612 /* update VnodeEssence for new readme vnode */
3613 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3615 vep->blockCount = nBlocks(length);
3616 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3617 vep->parent = rvnode->parent;
3618 vep->unique = rvnode->uniquifier;
3619 vep->modeBits = rvnode->modeBits;
3620 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3621 vep->type = rvnode->type;
3622 vep->author = rvnode->author;
3623 vep->owner = rvnode->owner;
3624 vep->group = rvnode->group;
3634 *ainode = readmeinode;
3639 if (IH_DEC(alinkH, readmeinode, vid)) {
3640 Log("CreateReadme (recovery): IH_DEC failed\n");
3652 * create a root dir for a volume that lacks one.
3654 * @param[in] volHeader vol header for the volume
3655 * @param[in] alinkH ihandle for disk access for this volume group
3656 * @param[in] vid volume id we're dealing with
3657 * @param[out] rootdir populated with info about the new root dir
3658 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3659 * updated to the new max unique if we create a new
3662 * @return operation status
3667 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3668 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3672 int decroot = 0, decreadme = 0;
3673 AFSFid did, readmeid;
3676 struct VnodeDiskObject *rootvnode = NULL;
3677 struct acl_accessList *ACL;
3680 struct VnodeEssence *vep;
3682 time_t now = time(NULL);
3684 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3685 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3689 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3690 /* We don't have any large vnodes in the volume; allocate room
3691 * for one so we can recreate the root dir */
3692 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3693 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3694 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3696 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3697 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3700 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3701 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3702 if (vep->type != vNull) {
3703 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3707 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3708 &readmeinode) != 0) {
3713 /* set the DV to a very high number, so it is unlikely that we collide
3714 * with a cached DV */
3717 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3719 if (!VALID_INO(rootinode)) {
3720 Log("CreateRootDir: IH_CREATE failed\n");
3725 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3726 rootinode, &salvinfo->VolumeChanged);
3730 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3731 Log("CreateRootDir: MakeDir failed\n");
3734 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3735 Log("CreateRootDir: Create failed\n");
3739 length = afs_dir_Length(&rootdir->dirHandle);
3740 DZap(&rootdir->dirHandle);
3742 /* create the new root dir vnode */
3743 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3745 Log("CreateRootDir: malloc failed\n");
3749 /* only give 'rl' permissions to 'system:administrators'. We do this to
3750 * try to catch the attention of an administrator, that they should not
3751 * be writing to this directory or continue to use it. */
3752 ACL = VVnodeDiskACL(rootvnode);
3753 ACL->size = sizeof(struct acl_accessList);
3754 ACL->version = ACL_ACLVERSION;
3758 ACL->entries[0].id = -204; /* system:administrators */
3759 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3761 rootvnode->type = vDirectory;
3762 rootvnode->cloned = 0;
3763 rootvnode->modeBits = 0777;
3764 rootvnode->linkCount = 2;
3765 VNDISK_SET_LEN(rootvnode, length);
3766 rootvnode->uniquifier = 1;
3767 rootvnode->dataVersion = dv;
3768 VNDISK_SET_INO(rootvnode, rootinode);
3769 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3770 rootvnode->author = 0;
3771 rootvnode->owner = 0;
3772 rootvnode->parent = 0;
3773 rootvnode->group = 0;
3774 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3776 /* write it out to disk */
3777 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3778 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3779 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3781 if (bytes != SIZEOF_LARGEDISKVNODE) {
3782 /* just cast to int and don't worry about printing real 64-bit ints;
3783 * a large disk vnode isn't anywhere near the 32-bit limit */
3784 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3785 (int)SIZEOF_LARGEDISKVNODE);
3789 /* update VnodeEssence for the new root vnode */
3790 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3792 vep->blockCount = nBlocks(length);
3793 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3794 vep->parent = rootvnode->parent;
3795 vep->unique = rootvnode->uniquifier;
3796 vep->modeBits = rootvnode->modeBits;
3797 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3798 vep->type = rootvnode->type;
3799 vep->author = rootvnode->author;
3800 vep->owner = rootvnode->owner;
3801 vep->group = rootvnode->group;
3811 /* update DirSummary for the new root vnode */
3812 rootdir->vnodeNumber = 1;
3813 rootdir->unique = 1;
3814 rootdir->haveDot = 1;
3815 rootdir->haveDotDot = 1;
3816 rootdir->rwVid = vid;
3817 rootdir->copied = 0;
3818 rootdir->parent = 0;
3819 rootdir->name = strdup(".");
3820 rootdir->vname = volHeader->name;
3821 rootdir->ds_linkH = alinkH;
3828 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3829 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3831 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3832 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3842 * salvage a volume group.
3844 * @param[in] salvinfo information for the curent salvage job
3845 * @param[in] rwIsp inode summary for rw volume
3846 * @param[in] alinkH link table inode handle
3848 * @return operation status
3852 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3854 /* This routine, for now, will only be called for read-write volumes */
3856 int BlocksInVolume = 0, FilesInVolume = 0;
3858 struct DirSummary rootdir, oldrootdir;
3859 struct VnodeInfo *dirVnodeInfo;
3860 struct VnodeDiskObject vnode;
3861 VolumeDiskData volHeader;
3863 int orphaned, rootdirfound = 0;
3864 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3865 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3866 struct VnodeEssence *vep;
3869 afs_sfsize_t nBytes;
3871 VnodeId LFVnode, ThisVnode;
3872 Unique LFUnique, ThisUnique;
3876 vid = rwIsp->volSummary->header.id;
3877 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3878 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3879 osi_Assert(nBytes == sizeof(volHeader));
3880 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3881 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3882 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3884 DistilVnodeEssence(salvinfo, vid, vLarge,
3885 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3886 DistilVnodeEssence(salvinfo, vid, vSmall,
3887 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3889 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3890 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3891 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3892 &rootdir, &rootdirfound);
3895 nt_sync(salvinfo->fileSysDevice);
3897 sync(); /* This used to be done lower level, for every dir */
3904 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3906 Log("Cannot find root directory for volume %lu; attempting to create "
3907 "a new one\n", afs_printable_uint32_lu(vid));
3909 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3914 salvinfo->VolumeChanged = 1;
3918 /* Parse each vnode looking for orphaned vnodes and
3919 * connect them to the tree as orphaned (if requested).
3921 oldrootdir = rootdir;
3922 for (class = 0; class < nVNODECLASSES; class++) {
3923 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3924 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3925 ThisVnode = bitNumberToVnodeNumber(v, class);
3926 ThisUnique = vep->unique;
3928 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3929 continue; /* Ignore unused, claimed, and root vnodes */
3931 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3932 * entry in this vnode had incremented the parent link count (In
3933 * JudgeEntry()). We need to go to the parent and decrement that
3934 * link count. But if the parent's unique is zero, then the parent
3935 * link count was not incremented in JudgeEntry().
3937 if (class == vLarge) { /* directory vnode */
3938 pv = vnodeIdToBitNumber(vep->parent);
3939 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3940 if (vep->parent == 1 && newrootdir) {
3941 /* this vnode's parent was the volume root, and
3942 * we just created the volume root. So, the parent
3943 * dir didn't exist during JudgeEntry, so the link
3944 * count was not inc'd there, so don't dec it here.
3950 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3956 continue; /* If no rootdir, can't attach orphaned files */
3958 /* Here we attach orphaned files and directories into the
3959 * root directory, LVVnode, making sure link counts stay correct.
3961 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3962 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3963 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3965 /* Update this orphaned vnode's info. Its parent info and
3966 * link count (do for orphaned directories and files).
3968 vep->parent = LFVnode; /* Parent is the root dir */
3969 vep->unique = LFUnique;
3972 vep->count--; /* Inc link count (root dir will pt to it) */
3974 /* If this orphaned vnode is a directory, change '..'.
3975 * The name of the orphaned dir/file is unknown, so we
3976 * build a unique name. No need to CopyOnWrite the directory
3977 * since it is not connected to tree in BK or RO volume and
3978 * won't be visible there.
3980 if (class == vLarge) {
3984 /* Remove and recreate the ".." entry in this orphaned directory */
3985 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3986 salvinfo->vnodeInfo[class].inodes[v],
3987 &salvinfo->VolumeChanged);
3989 pa.Unique = LFUnique;
3990 osi_Assert(afs_dir_Delete(&dh, "..") == 0);
3991 osi_Assert(afs_dir_Create(&dh, "..", &pa) == 0);
3993 /* The original parent's link count was decremented above.
3994 * Here we increment the new parent's link count.
3996 pv = vnodeIdToBitNumber(LFVnode);
3997 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4001 /* Go to the root dir and add this entry. The link count of the
4002 * root dir was incremented when ".." was created. Try 10 times.
4004 for (j = 0; j < 10; j++) {
4005 pa.Vnode = ThisVnode;
4006 pa.Unique = ThisUnique;
4008 snprintf(npath, sizeof npath, "%s.%u.%u",
4009 ((class == vLarge) ? "__ORPHANDIR__"
4010 : "__ORPHANFILE__"),
4011 ThisVnode, ThisUnique);
4013 CopyOnWrite(salvinfo, &rootdir);
4014 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4018 ThisUnique += 50; /* Try creating a different file */
4020 osi_Assert(code == 0);
4021 Log("Attaching orphaned %s to volume's root dir as %s\n",
4022 ((class == vLarge) ? "directory" : "file"), npath);
4024 } /* for each vnode in the class */
4025 } /* for each class of vnode */
4027 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4029 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4031 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4033 osi_Assert(code == 0);
4034 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4037 DFlush(); /* Flush the changes */
4038 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4039 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4040 orphans = ORPH_IGNORE;
4043 /* Write out all changed vnodes. Orphaned files and directories
4044 * will get removed here also (if requested).
4046 for (class = 0; class < nVNODECLASSES; class++) {
4047 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4048 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4049 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4050 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4051 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4052 for (i = 0; i < nVnodes; i++) {
4053 struct VnodeEssence *vnp = &vnodes[i];
4054 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4056 /* If the vnode is good but is unclaimed (not listed in
4057 * any directory entries), then it is orphaned.
4060 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4061 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4065 if (vnp->changed || vnp->count) {
4068 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4069 vnodeIndexOffset(vcp, vnodeNumber),
4070 (char *)&vnode, sizeof(vnode));
4071 osi_Assert(nBytes == sizeof(vnode));
4073 vnode.parent = vnp->parent;
4074 oldCount = vnode.linkCount;
4075 vnode.linkCount = vnode.linkCount - vnp->count;
4078 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4080 if (!vnp->todelete) {
4081 /* Orphans should have already been attached (if requested) */
4082 osi_Assert(orphans != ORPH_ATTACH);
4083 oblocks += vnp->blockCount;
4086 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4088 BlocksInVolume -= vnp->blockCount;
4090 if (VNDISK_GET_INO(&vnode)) {
4092 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4093 osi_Assert(code == 0);
4095 memset(&vnode, 0, sizeof(vnode));
4097 } else if (vnp->count) {
4099 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4102 vnode.modeBits = vnp->modeBits;
4105 vnode.dataVersion++;
4108 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4109 vnodeIndexOffset(vcp, vnodeNumber),
4110 (char *)&vnode, sizeof(vnode));
4111 osi_Assert(nBytes == sizeof(vnode));
4113 salvinfo->VolumeChanged = 1;
4117 if (!Showmode && ofiles) {
4118 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4120 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4124 for (class = 0; class < nVNODECLASSES; class++) {
4125 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4126 for (i = 0; i < vip->nVnodes; i++)
4127 if (vip->vnodes[i].name)
4128 free(vip->vnodes[i].name);
4135 /* Set correct resource utilization statistics */
4136 volHeader.filecount = FilesInVolume;
4137 volHeader.diskused = BlocksInVolume;
4139 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4140 if (volHeader.uniquifier < (maxunique + 1)) {
4142 Log("Volume uniquifier is too low; fixed\n");
4143 /* Plus 2,000 in case there are workstations out there with
4144 * cached vnodes that have since been deleted
4146 volHeader.uniquifier = (maxunique + 1 + 2000);
4150 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4151 "Only use this salvaged volume to copy data to another volume; "
4152 "do not continue to use this volume (%lu) as-is.\n",
4153 afs_printable_uint32_lu(vid));
4156 if (!Testing && salvinfo->VolumeChanged) {
4157 #ifdef FSSYNC_BUILD_CLIENT
4158 if (salvinfo->useFSYNC) {
4159 afs_int32 fsync_code;
4161 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4163 Log("Error trying to tell the fileserver to break callbacks for "
4164 "changed volume %lu; error code %ld\n",
4165 afs_printable_uint32_lu(vid),
4166 afs_printable_int32_ld(fsync_code));
4168 salvinfo->VolumeChanged = 0;
4171 #endif /* FSSYNC_BUILD_CLIENT */
4173 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4174 if (!salvinfo->useFSYNC) {
4175 /* A volume's contents have changed, but the fileserver will not
4176 * break callbacks on the volume until it tries to load the vol
4177 * header. So, to reduce the amount of time a client could have
4178 * stale data, remove fsstate.dat, so the fileserver will init
4179 * callback state with all clients. This is a very coarse hammer,
4180 * and in the future we should just record which volumes have
4182 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4183 if (code && errno != ENOENT) {
4184 Log("Error %d when trying to unlink FS state file %s\n", errno,
4185 AFSDIR_SERVER_FSSTATE_FILEPATH);
4191 /* Turn off the inUse bit; the volume's been salvaged! */
4192 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4193 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4194 volHeader.inService = 1; /* allow service again */
4195 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4196 volHeader.dontSalvage = DONT_SALVAGE;
4197 salvinfo->VolumeChanged = 0;
4199 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4200 osi_Assert(nBytes == sizeof(volHeader));
4203 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4204 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4205 FilesInVolume, BlocksInVolume);
4208 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4209 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4215 ClearROInUseBit(struct VolumeSummary *summary)
4217 IHandle_t *h = summary->volumeInfoHandle;
4218 afs_sfsize_t nBytes;
4220 VolumeDiskData volHeader;
4222 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4223 osi_Assert(nBytes == sizeof(volHeader));
4224 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4225 volHeader.inUse = 0;
4226 volHeader.needsSalvaged = 0;
4227 volHeader.inService = 1;
4228 volHeader.dontSalvage = DONT_SALVAGE;
4230 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4231 osi_Assert(nBytes == sizeof(volHeader));
4236 * Possible delete the volume.
4238 * deleteMe - Always do so, only a partial volume.
4241 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4242 char *message, int deleteMe, int check)
4244 if (readOnly(isp) || deleteMe) {
4245 if (isp->volSummary && isp->volSummary->fileName) {
4248 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4250 Log("It will be deleted on this server (you may find it elsewhere)\n");
4253 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4255 Log("it will be deleted instead. It should be recloned.\n");
4260 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4262 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4264 Log("Error %ld destroying volume disk header for volume %lu\n",
4265 afs_printable_int32_ld(code),
4266 afs_printable_uint32_lu(isp->volumeId));
4269 /* make sure we actually delete the fileName file; ENOENT
4270 * is fine, since VDestroyVolumeDiskHeader probably already
4272 if (unlink(path) && errno != ENOENT) {
4273 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4275 if (salvinfo->useFSYNC) {
4276 AskDelete(salvinfo, isp->volumeId);
4278 isp->volSummary->deleted = 1;
4281 } else if (!check) {
4282 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4284 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4288 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4290 * Locks a volume on disk for salvaging.
4292 * @param[in] volumeId volume ID to lock
4294 * @return operation status
4296 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4297 * checked out and locked again
4302 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4307 /* should always be WRITE_LOCK, but keep the lock-type logic all
4308 * in one place, in VVolLockType. Params will be ignored, but
4309 * try to provide what we're logically doing. */
4310 locktype = VVolLockType(V_VOLUPD, 1);
4312 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4314 if (code == EBUSY) {
4315 Abort("Someone else appears to be using volume %lu; Aborted\n",
4316 afs_printable_uint32_lu(volumeId));
4318 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4319 afs_printable_int32_ld(code),
4320 afs_printable_uint32_lu(volumeId));
4323 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4324 if (code == SYNC_DENIED) {
4325 /* need to retry checking out volumes */
4328 if (code != SYNC_OK) {
4329 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4330 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4333 /* set inUse = programType in the volume header to ensure that nobody
4334 * tries to use this volume again without salvaging, if we somehow crash
4335 * or otherwise exit before finishing the salvage.
4339 struct VolumeHeader header;
4340 struct VolumeDiskHeader diskHeader;
4341 struct VolumeDiskData volHeader;
4343 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4348 DiskToVolumeHeader(&header, &diskHeader);
4350 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4351 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4352 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4358 volHeader.inUse = programType;
4360 /* If we can't re-write the header, bail out and error. We don't
4361 * assert when reading the header, since it's possible the
4362 * header isn't really there (when there's no data associated
4363 * with the volume; we just delete the vol header file in that
4364 * case). But if it's there enough that we can read it, but
4365 * somehow we cannot write to it to signify we're salvaging it,
4366 * we've got a big problem and we cannot continue. */
4367 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4374 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4377 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4379 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4381 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4382 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4383 if (code != SYNC_OK) {
4384 Log("AskError: failed to force volume %lu into error state; "
4385 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4386 (long)code, SYNC_res2string(code));
4388 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4392 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4397 memset(&res, 0, sizeof(res));
4399 for (i = 0; i < 3; i++) {
4400 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4401 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4403 if (code == SYNC_OK) {
4405 } else if (code == SYNC_DENIED) {
4407 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4409 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4410 Abort("Salvage aborted\n");
4411 } else if (code == SYNC_BAD_COMMAND) {
4412 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4415 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4416 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4418 Log("AskOffline: fileserver is DAFS but we are not.\n");
4421 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4422 Log("AskOffline: fileserver is not DAFS but we are.\n");
4424 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4427 Abort("Salvage aborted\n");
4430 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4431 FSYNC_clientFinis();
4435 if (code != SYNC_OK) {
4436 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4437 Abort("Salvage aborted\n");
4441 /* don't want to pass around state; remember it here */
4442 static int isDAFS = -1;
4447 afs_int32 code = 1, i;
4449 /* we don't care if we race. the answer shouldn't change */
4453 memset(&res, 0, sizeof(res));
4455 for (i = 0; code && i < 3; i++) {
4456 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4458 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4459 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4460 FSYNC_reason2string(res.hdr.reason));
4461 FSYNC_clientFinis();
4467 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4471 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4481 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4483 struct VolumeDiskHeader diskHdr;
4485 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4487 /* volume probably does not exist; no need to bring back online */
4490 AskOnline(salvinfo, volumeId);
4494 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4498 for (i = 0; i < 3; i++) {
4499 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4500 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4502 if (code == SYNC_OK) {
4504 } else if (code == SYNC_DENIED) {
4505 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4506 } else if (code == SYNC_BAD_COMMAND) {
4507 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4509 Log("AskOnline: please make sure file server binaries are same version.\n");
4513 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4514 FSYNC_clientFinis();
4521 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4526 for (i = 0; i < 3; i++) {
4527 memset(&res, 0, sizeof(res));
4528 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4529 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4531 if (code == SYNC_OK) {
4533 } else if (code == SYNC_DENIED) {
4534 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4535 } else if (code == SYNC_BAD_COMMAND) {
4536 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4539 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4540 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4542 Log("AskOnline: fileserver is DAFS but we are not.\n");
4545 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4546 Log("AskOnline: fileserver is not DAFS but we are.\n");
4548 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4552 } else if (code == SYNC_FAILED &&
4553 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4554 res.hdr.reason == FSYNC_WRONG_PART)) {
4555 /* volume is already effectively 'deleted' */
4559 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4560 FSYNC_clientFinis();
4567 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4569 /* Volume parameter is passed in case iopen is upgraded in future to
4570 * require a volume Id to be passed
4573 IHandle_t *srcH, *destH;
4574 FdHandle_t *srcFdP, *destFdP;
4576 afs_foff_t size = 0;
4578 IH_INIT(srcH, device, rwvolume, inode1);
4579 srcFdP = IH_OPEN(srcH);
4580 osi_Assert(srcFdP != NULL);
4581 IH_INIT(destH, device, rwvolume, inode2);
4582 destFdP = IH_OPEN(destH);
4583 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4584 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4587 osi_Assert(nBytes == 0);
4588 FDH_REALLYCLOSE(srcFdP);
4589 FDH_REALLYCLOSE(destFdP);
4596 PrintInodeList(struct SalvInfo *salvinfo)
4598 struct ViceInodeInfo *ip;
4599 struct ViceInodeInfo *buf;
4602 afs_sfsize_t st_size;
4604 st_size = OS_SIZE(salvinfo->inodeFd);
4605 osi_Assert(st_size >= 0);
4606 buf = (struct ViceInodeInfo *)malloc(st_size);
4607 osi_Assert(buf != NULL);
4608 nInodes = st_size / sizeof(struct ViceInodeInfo);
4609 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4610 for (ip = buf; nInodes--; ip++) {
4611 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4612 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4613 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4614 ip->u.param[2], ip->u.param[3]);
4620 PrintInodeSummary(struct SalvInfo *salvinfo)
4623 struct InodeSummary *isp;
4625 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4626 isp = &salvinfo->inodeSummary[i];
4627 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4632 PrintVolumeSummary(struct SalvInfo *salvinfo)
4635 struct VolumeSummary *vsp;
4637 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4638 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4648 osi_Assert(0); /* Fork is never executed in the NT code path */
4652 #ifdef AFS_DEMAND_ATTACH_FS
4653 if ((f == 0) && (programType == salvageServer)) {
4654 /* we are a salvageserver child */
4655 #ifdef FSSYNC_BUILD_CLIENT
4656 VChildProcReconnectFS_r();
4658 #ifdef SALVSYNC_BUILD_CLIENT
4662 #endif /* AFS_DEMAND_ATTACH_FS */
4663 #endif /* !AFS_NT40_ENV */
4673 #ifdef AFS_DEMAND_ATTACH_FS
4674 if (programType == salvageServer) {
4675 #ifdef SALVSYNC_BUILD_CLIENT
4678 #ifdef FSSYNC_BUILD_CLIENT
4682 #endif /* AFS_DEMAND_ATTACH_FS */
4685 if (main_thread != pthread_self())
4686 pthread_exit((void *)code);
4699 pid = wait(&status);
4700 osi_Assert(pid != -1);
4701 if (WCOREDUMP(status))
4702 Log("\"%s\" core dumped!\n", prog);
4703 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4709 TimeStamp(time_t clock, int precision)
4712 static char timestamp[20];
4713 lt = localtime(&clock);
4715 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4717 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4722 CheckLogFile(char * log_path)
4724 char oldSlvgLog[AFSDIR_PATH_MAX];
4726 #ifndef AFS_NT40_ENV
4733 strcpy(oldSlvgLog, log_path);
4734 strcat(oldSlvgLog, ".old");
4736 renamefile(log_path, oldSlvgLog);
4737 logFile = afs_fopen(log_path, "a");
4739 if (!logFile) { /* still nothing, use stdout */
4743 #ifndef AFS_NAMEI_ENV
4744 AFS_DEBUG_IOPS_LOG(logFile);
4749 #ifndef AFS_NT40_ENV
4751 TimeStampLogFile(char * log_path)
4753 char stampSlvgLog[AFSDIR_PATH_MAX];
4758 lt = localtime(&now);
4759 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4760 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4761 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4762 lt->tm_min, lt->tm_sec);
4764 /* try to link the logfile to a timestamped filename */
4765 /* if it fails, oh well, nothing we can do */
4766 link(log_path, stampSlvgLog);
4775 #ifndef AFS_NT40_ENV
4777 printf("Can't show log since using syslog.\n");
4788 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4791 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4794 while (fgets(line, sizeof(line), logFile))
4801 Log(const char *format, ...)
4807 va_start(args, format);
4808 vsnprintf(tmp, sizeof tmp, format, args);
4810 #ifndef AFS_NT40_ENV
4812 syslog(LOG_INFO, "%s", tmp);
4816 gettimeofday(&now, NULL);
4817 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4823 Abort(const char *format, ...)
4828 va_start(args, format);
4829 vsnprintf(tmp, sizeof tmp, format, args);
4831 #ifndef AFS_NT40_ENV
4833 syslog(LOG_INFO, "%s", tmp);
4837 fprintf(logFile, "%s", tmp);
4849 ToString(const char *s)
4852 p = (char *)malloc(strlen(s) + 1);
4853 osi_Assert(p != NULL);
4858 /* Remove the FORCESALVAGE file */
4860 RemoveTheForce(char *path)
4863 struct afs_stat_st force; /* so we can use afs_stat to find it */
4864 strcpy(target,path);
4865 strcat(target,"/FORCESALVAGE");
4866 if (!Testing && ForceSalvage) {
4867 if (afs_stat(target,&force) == 0) unlink(target);
4871 #ifndef AFS_AIX32_ENV
4873 * UseTheForceLuke - see if we can use the force
4876 UseTheForceLuke(char *path)
4878 struct afs_stat_st force;
4880 strcpy(target,path);
4881 strcat(target,"/FORCESALVAGE");
4883 return (afs_stat(target, &force) == 0);
4887 * UseTheForceLuke - see if we can use the force
4890 * The VRMIX fsck will not muck with the filesystem it is supposedly
4891 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4892 * muck directly with the root inode, which is within the normal
4894 * ListViceInodes() has a side effect of setting ForceSalvage if
4895 * it detects a need, based on root inode examination.
4898 UseTheForceLuke(char *path)
4901 return 0; /* sorry OB1 */
4906 /* NT support routines */
4908 static char execpathname[MAX_PATH];
4910 nt_SalvagePartition(char *partName, int jobn)
4915 if (!*execpathname) {
4916 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4917 if (!n || n == 1023)
4920 job.cj_magic = SALVAGER_MAGIC;
4921 job.cj_number = jobn;
4922 (void)strcpy(job.cj_part, partName);
4923 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4928 nt_SetupPartitionSalvage(void *datap, int len)
4930 childJob_t *jobp = (childJob_t *) datap;
4931 char logname[AFSDIR_PATH_MAX];
4933 if (len != sizeof(childJob_t))
4935 if (jobp->cj_magic != SALVAGER_MAGIC)
4940 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4942 logFile = afs_fopen(logname, "w");
4950 #endif /* AFS_NT40_ENV */