2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
105 #if defined(AFS_VFSINCL_ENV)
106 #include <sys/vnode.h>
108 #include <sys/fs/ufs_inode.h>
110 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
111 #include <ufs/ufs/dinode.h>
112 #include <ufs/ffs/fs.h>
114 #include <ufs/inode.h>
117 #else /* AFS_VFSINCL_ENV */
119 #include <ufs/inode.h>
120 #else /* AFS_OSF_ENV */
121 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
122 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
160 #include <afs/afssyscalls.h>
164 #include "partition.h"
165 #include "daemon_com.h"
166 #include "daemon_com_inline.h"
168 #include "fssync_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[9]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = (struct job *)malloc(sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 memset(thisjob, 0, sizeof(struct job));
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 osi_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 osi_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
615 openlog("salvager", LOG_PID, useSyslogFacility);
619 snprintf(logFileName, sizeof logFileName, "%s.%d",
620 AFSDIR_SERVER_SLVGLOG_FILEPATH,
621 jobs[startjob]->jobnumb);
622 logFile = afs_fopen(logFileName, "w");
627 SalvageFileSys1(jobs[startjob]->partP, 0);
632 } /* while ( thisjob || (!partP && numjobs > 0) ) */
634 /* If waited for all jobs to complete, now collect log files and return */
636 if (!useSyslog) /* if syslogging - no need to collect */
639 for (i = 0; i < jobcount; i++) {
640 snprintf(logFileName, sizeof logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
642 if ((passLog = afs_fopen(logFileName, "r"))) {
643 while (fgets(buf, sizeof(buf), passLog)) {
648 (void)unlink(logFileName);
657 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
659 if (!canfork || debug || Fork() == 0) {
660 SalvageFileSys1(partP, singleVolumeNumber);
661 if (canfork && !debug) {
666 Wait("SalvageFileSys");
670 get_DevName(char *pbuffer, char *wpath)
672 char pbuf[128], *ptr;
673 strcpy(pbuf, pbuffer);
674 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
680 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
682 strcpy(pbuffer, ptr + 1);
689 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
692 char inodeListPath[256];
693 FD_t inodeFile = INVALID_FD;
694 static char tmpDevName[100];
695 static char wpath[100];
696 struct VolumeSummary *vsp, *esp;
700 struct SalvInfo l_salvinfo;
701 struct SalvInfo *salvinfo = &l_salvinfo;
704 memset(salvinfo, 0, sizeof(*salvinfo));
707 if (inodeFile != INVALID_FD) {
709 inodeFile = INVALID_FD;
711 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
712 Abort("Raced too many times with fileserver restarts while trying to "
713 "checkout/lock volumes; Aborted\n");
715 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
717 /* unlock all previous volume locks, since we're about to lock them
719 VLockFileReinit(&partP->volLockFile);
721 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
723 salvinfo->fileSysPartition = partP;
724 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
725 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
728 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
729 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
730 name = partP->devName;
732 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
733 strcpy(tmpDevName, partP->devName);
734 name = get_DevName(tmpDevName, wpath);
735 salvinfo->fileSysDeviceName = name;
736 salvinfo->filesysfulldev = wpath;
739 if (singleVolumeNumber) {
740 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
741 /* only non-DAFS locks the partition when salvaging a single volume;
742 * DAFS will lock the individual volumes in the VG */
743 VLockPartition(partP->name);
744 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
748 /* salvageserver already setup fssync conn for us */
749 if ((programType != salvageServer) && !VConnectFS()) {
750 Abort("Couldn't connect to file server\n");
753 salvinfo->useFSYNC = 1;
754 AskOffline(salvinfo, singleVolumeNumber);
755 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
756 if (LockVolume(salvinfo, singleVolumeNumber)) {
759 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
762 salvinfo->useFSYNC = 0;
763 VLockPartition(partP->name);
767 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
770 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
771 partP->name, name, (Testing ? "(READONLY mode)" : ""));
773 Log("***Forced salvage of all volumes on this partition***\n");
778 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
785 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
786 while ((dp = readdir(dirp))) {
787 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
788 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
790 Log("Removing old salvager temp files %s\n", dp->d_name);
791 strcpy(npath, salvinfo->fileSysPath);
792 strcat(npath, OS_DIRSEP);
793 strcat(npath, dp->d_name);
799 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
801 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
802 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
804 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
808 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
809 if (inodeFile == INVALID_FD) {
810 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
813 /* Using nt_unlink here since we're really using the delete on close
814 * semantics of unlink. In most places in the salvager, we really do
815 * mean to unlink the file at that point. Those places have been
816 * modified to actually do that so that the NT crt can be used there.
818 * jaltman - On NT delete on close cannot be applied to a file while the
819 * process has an open file handle that does not have DELETE file
820 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
821 * delete privileges. As a result the nt_unlink() call will always
824 code = nt_unlink(inodeListPath);
826 code = unlink(inodeListPath);
829 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
832 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
836 salvinfo->inodeFd = inodeFile;
837 if (salvinfo->inodeFd == INVALID_FD)
838 Abort("Temporary file %s is missing...\n", inodeListPath);
839 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
840 if (ListInodeOption) {
841 PrintInodeList(salvinfo);
842 if (singleVolumeNumber) {
843 /* We've checked out the volume from the fileserver, and we need
844 * to give it back. We don't know if the volume exists or not,
845 * so we don't know whether to AskOnline or not. Try to determine
846 * if the volume exists by trying to read the volume header, and
847 * AskOnline if it is readable. */
848 MaybeAskOnline(salvinfo, singleVolumeNumber);
852 /* enumerate volumes in the partition.
853 * figure out sets of read-only + rw volumes.
854 * salvage each set, read-only volumes first, then read-write.
855 * Fix up inodes on last volume in set (whether it is read-write
858 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
862 if (singleVolumeNumber) {
863 /* If we delete a volume during the salvage, we indicate as such by
864 * setting the volsummary->deleted field. We need to know if we
865 * deleted a volume or not in order to know which volumes to bring
866 * back online after the salvage. If we fork, we will lose this
867 * information, since volsummary->deleted will not get set in the
868 * parent. So, don't fork. */
872 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
873 i < salvinfo->nVolumesInInodeFile; i = j) {
874 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
876 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
878 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
879 struct VolumeSummary *tsp;
880 /* Scan volume list (from partition root directory) looking for the
881 * current rw volume number in the volume list from the inode scan.
882 * If there is one here that is not in the inode volume list,
884 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
886 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
888 /* Now match up the volume summary info from the root directory with the
889 * entry in the volume list obtained from scanning inodes */
890 salvinfo->inodeSummary[j].volSummary = NULL;
891 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
892 if (tsp->header.id == vid) {
893 salvinfo->inodeSummary[j].volSummary = tsp;
899 /* Salvage the group of volumes (several read-only + 1 read/write)
900 * starting with the current read-only volume we're looking at.
903 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
905 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
906 #endif /* AFS_NT40_ENV */
910 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
911 for (; vsp < esp; vsp++) {
913 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
916 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
917 RemoveTheForce(salvinfo->fileSysPath);
919 if (!Testing && singleVolumeNumber) {
921 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
922 /* unlock vol headers so the fs can attach them when we AskOnline */
923 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
924 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
926 /* Step through the volumeSummary list and set all volumes on-line.
927 * Most volumes were taken off-line in GetVolumeSummary.
928 * If a volume was deleted, don't tell the fileserver anything, since
929 * we already told the fileserver the volume was deleted back when we
930 * we destroyed the volume header.
931 * Also, make sure we bring the singleVolumeNumber back online first.
934 for (j = 0; j < salvinfo->nVolumes; j++) {
935 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
937 if (!salvinfo->volumeSummaryp[j].deleted) {
938 AskOnline(salvinfo, singleVolumeNumber);
944 /* If singleVolumeNumber is not in our volumeSummary, it means that
945 * at least one other volume in the VG is on the partition, but the
946 * RW volume is not. We've already AskOffline'd it by now, though,
947 * so make sure we don't still have the volume checked out. */
948 AskDelete(salvinfo, singleVolumeNumber);
951 for (j = 0; j < salvinfo->nVolumes; j++) {
952 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
953 if (!salvinfo->volumeSummaryp[j].deleted) {
954 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
960 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
961 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
964 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
968 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
971 char filename[VMAXPATHLEN];
977 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
978 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
981 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
984 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
986 Log("Error %ld destroying volume disk header for volume %lu\n",
987 afs_printable_int32_ld(code),
988 afs_printable_uint32_lu(vsp->header.id));
991 /* make sure we actually delete the header file; ENOENT
992 * is fine, since VDestroyVolumeDiskHeader probably already
994 if (unlink(path) && errno != ENOENT) {
995 Log("Unable to unlink %s (errno = %d)\n", path, errno);
997 if (salvinfo->useFSYNC) {
998 AskDelete(salvinfo, vsp->header.id);
1005 CompareInodes(const void *_p1, const void *_p2)
1007 const struct ViceInodeInfo *p1 = _p1;
1008 const struct ViceInodeInfo *p2 = _p2;
1009 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1010 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1011 VolumeId p1rwid, p2rwid;
1013 (p1->u.vnode.vnodeNumber ==
1014 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1016 (p2->u.vnode.vnodeNumber ==
1017 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1018 if (p1rwid < p2rwid)
1020 if (p1rwid > p2rwid)
1022 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1023 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1024 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1025 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1026 if (p1->u.vnode.volumeId == p1rwid)
1028 if (p2->u.vnode.volumeId == p2rwid)
1030 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1032 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1033 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1034 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1036 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1038 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1040 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1042 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1044 /* The following tests are reversed, so that the most desirable
1045 * of several similar inodes comes first */
1046 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1049 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1062 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1072 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1073 #ifdef AFS_3DISPARES
1074 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1075 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1078 #ifdef AFS_SGI_EXMAG
1079 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1080 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1085 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1086 #ifdef AFS_3DISPARES
1087 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1088 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1091 #ifdef AFS_SGI_EXMAG
1092 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1093 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1102 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1103 struct InodeSummary *summary)
1105 VolumeId volume = ip->u.vnode.volumeId;
1106 VolumeId rwvolume = volume;
1111 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1113 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1115 rwvolume = ip->u.special.parentId;
1116 /* This isn't quite right, as there could (in error) be different
1117 * parent inodes in different special vnodes */
1119 if (maxunique < ip->u.vnode.vnodeUniquifier)
1120 maxunique = ip->u.vnode.vnodeUniquifier;
1124 summary->volumeId = volume;
1125 summary->RWvolumeId = rwvolume;
1126 summary->nInodes = n;
1127 summary->nSpecialInodes = nSpecial;
1128 summary->maxUniquifier = maxunique;
1132 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1134 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1135 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1136 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1141 * Collect list of inodes in file named by path. If a truly fatal error,
1142 * unlink the file and abort. For lessor errors, return -1. The file will
1143 * be unlinked by the caller.
1146 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1150 struct ViceInodeInfo *ip, *ip_save;
1151 struct InodeSummary summary;
1152 char summaryFileName[50];
1153 FD_t summaryFile = INVALID_FD;
1155 char *dev = salvinfo->fileSysPath;
1156 char *wpath = salvinfo->fileSysPath;
1158 char *dev = salvinfo->fileSysDeviceName;
1159 char *wpath = salvinfo->filesysfulldev;
1161 char *part = salvinfo->fileSysPath;
1166 afs_sfsize_t st_size;
1168 /* This file used to come from vfsck; cobble it up ourselves now... */
1170 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1171 singleVolumeNumber ? OnlyOneVolume : 0,
1172 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1174 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1178 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1180 if (forceSal && !ForceSalvage) {
1181 Log("***Forced salvage of all volumes on this partition***\n");
1184 OS_SEEK(inodeFile, 0L, SEEK_SET);
1185 salvinfo->inodeFd = inodeFile;
1186 if (salvinfo->inodeFd == INVALID_FD ||
1187 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1188 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1190 tdir = (tmpdir ? tmpdir : part);
1192 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1193 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1195 snprintf(summaryFileName, sizeof summaryFileName,
1196 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1198 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1199 if (summaryFile == INVALID_FD) {
1200 Abort("Unable to create inode summary file\n");
1204 /* Using nt_unlink here since we're really using the delete on close
1205 * semantics of unlink. In most places in the salvager, we really do
1206 * mean to unlink the file at that point. Those places have been
1207 * modified to actually do that so that the NT crt can be used there.
1209 * jaltman - As commented elsewhere, this cannot work because fopen()
1210 * does not open files with DELETE and FILE_SHARE_DELETE.
1212 code = nt_unlink(summaryFileName);
1214 code = unlink(summaryFileName);
1217 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1220 if (!canfork || debug || Fork() == 0) {
1221 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1223 OS_CLOSE(summaryFile);
1224 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1225 RemoveTheForce(salvinfo->fileSysPath);
1227 struct VolumeSummary *vsp;
1231 GetVolumeSummary(salvinfo, singleVolumeNumber);
1233 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1235 if (vsp->header.id == singleVolumeNumber) {
1238 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1244 MaybeAskOnline(salvinfo, singleVolumeNumber);
1246 /* make sure we get rid of stray .vol headers, even if
1247 * they're not in our volume summary (might happen if
1248 * e.g. something else created them and they're not in the
1249 * fileserver VGC) */
1250 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1251 singleVolumeNumber, 0 /*parent*/);
1252 AskDelete(salvinfo, singleVolumeNumber);
1256 Log("%s vice inodes on %s; not salvaged\n",
1257 singleVolumeNumber ? "No applicable" : "No", dev);
1262 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1264 OS_CLOSE(summaryFile);
1266 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1269 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1270 OS_CLOSE(summaryFile);
1271 Abort("Unable to read inode table; %s not salvaged\n", dev);
1273 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1274 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1275 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1276 OS_CLOSE(summaryFile);
1277 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1282 CountVolumeInodes(ip, nInodes, &summary);
1283 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1284 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1285 OS_CLOSE(summaryFile);
1289 summary.index += (summary.nInodes);
1290 nInodes -= summary.nInodes;
1291 ip += summary.nInodes;
1294 ip = ip_save = NULL;
1295 /* Following fflush is not fclose, because if it was debug mode would not work */
1296 if (OS_SYNC(summaryFile) == -1) {
1297 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1298 OS_CLOSE(summaryFile);
1302 if (canfork && !debug) {
1307 if (Wait("Inode summary") == -1) {
1308 OS_CLOSE(summaryFile);
1309 Exit(1); /* salvage of this partition aborted */
1313 st_size = OS_SIZE(summaryFile);
1314 osi_Assert(st_size >= 0);
1317 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1318 osi_Assert(salvinfo->inodeSummary != NULL);
1319 /* For GNU we need to do lseek to get the file pointer moved. */
1320 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1321 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1322 osi_Assert(ret == st_size);
1324 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1325 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1326 salvinfo->inodeSummary[i].volSummary = NULL;
1328 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1329 OS_CLOSE(summaryFile);
1332 if (retcode && singleVolumeNumber && !deleted) {
1333 AskError(salvinfo, singleVolumeNumber);
1339 /* Comparison routine for volume sort.
1340 This is setup so that a read-write volume comes immediately before
1341 any read-only clones of that volume */
1343 CompareVolumes(const void *_p1, const void *_p2)
1345 const struct VolumeSummary *p1 = _p1;
1346 const struct VolumeSummary *p2 = _p2;
1347 if (p1->header.parent != p2->header.parent)
1348 return p1->header.parent < p2->header.parent ? -1 : 1;
1349 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1351 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1353 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1357 * Gleans volumeSummary information by asking the fileserver
1359 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1360 * salvaging a whole partition
1362 * @return whether we obtained the volume summary information or not
1363 * @retval 0 success; we obtained the volume summary information
1364 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1366 * @retval 1 we did not get the volume summary information; either the
1367 * fileserver responded with an error, or we are not supposed to
1368 * ask the fileserver for the information (e.g. we are salvaging
1369 * the entire partition or we are not the salvageserver)
1371 * @note for non-DAFS, always returns 1
1374 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1377 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1378 if (programType == salvageServer) {
1379 if (singleVolumeNumber) {
1380 FSSYNC_VGQry_response_t q_res;
1382 struct VolumeSummary *vsp;
1384 struct VolumeDiskHeader diskHdr;
1386 memset(&res, 0, sizeof(res));
1388 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1391 * We must wait for the partition to finish scanning before
1392 * can continue, since we will not know if we got the entire
1393 * VG membership unless the partition is fully scanned.
1394 * We could, in theory, just scan the partition ourselves if
1395 * the VG cache is not ready, but we would be doing the exact
1396 * same scan the fileserver is doing; it will almost always
1397 * be faster to wait for the fileserver. The only exceptions
1398 * are if the partition does not take very long to scan, and
1399 * in that case it's fast either way, so who cares?
1401 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1402 Log("waiting for fileserver to finish scanning partition %s...\n",
1403 salvinfo->fileSysPartition->name);
1405 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1406 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1407 * just so small partitions don't need to wait over 10
1408 * seconds every time, and large partitions are generally
1409 * polled only once every ten seconds. */
1410 sleep((i > 10) ? (i = 10) : i);
1412 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1416 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1417 /* This can happen if there's no header for the volume
1418 * we're salvaging, or no headers exist for the VG (if
1419 * we're salvaging an RW). Act as if we got a response
1420 * with no VG members. The headers may be created during
1421 * salvaging, if there are inodes in this VG. */
1423 memset(&q_res, 0, sizeof(q_res));
1424 q_res.rw = singleVolumeNumber;
1428 Log("fileserver refused VGCQuery request for volume %lu on "
1429 "partition %s, code %ld reason %ld\n",
1430 afs_printable_uint32_lu(singleVolumeNumber),
1431 salvinfo->fileSysPartition->name,
1432 afs_printable_int32_ld(code),
1433 afs_printable_int32_ld(res.hdr.reason));
1437 if (q_res.rw != singleVolumeNumber) {
1438 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1439 afs_printable_uint32_lu(singleVolumeNumber),
1440 afs_printable_uint32_lu(q_res.rw));
1441 #ifdef SALVSYNC_BUILD_CLIENT
1442 if (SALVSYNC_LinkVolume(q_res.rw,
1444 salvinfo->fileSysPartition->name,
1446 Log("schedule request failed\n");
1448 #endif /* SALVSYNC_BUILD_CLIENT */
1449 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1452 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1453 osi_Assert(salvinfo->volumeSummaryp != NULL);
1455 salvinfo->nVolumes = 0;
1456 vsp = salvinfo->volumeSummaryp;
1458 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1459 char name[VMAXPATHLEN];
1461 if (!q_res.children[i]) {
1465 /* AskOffline for singleVolumeNumber was called much earlier */
1466 if (q_res.children[i] != singleVolumeNumber) {
1467 AskOffline(salvinfo, q_res.children[i]);
1468 if (LockVolume(salvinfo, q_res.children[i])) {
1474 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1476 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1477 afs_printable_uint32_lu(q_res.children[i]));
1482 DiskToVolumeHeader(&vsp->header, &diskHdr);
1483 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1485 salvinfo->nVolumes++;
1489 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1494 Log("Cannot get volume summary from fileserver; falling back to scanning "
1495 "entire partition\n");
1498 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1503 * count how many volume headers are found by VWalkVolumeHeaders.
1505 * @param[in] dp the disk partition (unused)
1506 * @param[in] name full path to the .vol header (unused)
1507 * @param[in] hdr the header data (unused)
1508 * @param[in] last whether this is the last try or not (unused)
1509 * @param[in] rock actually an afs_int32*; the running count of how many
1510 * volumes we have found
1515 CountHeader(struct DiskPartition64 *dp, const char *name,
1516 struct VolumeDiskHeader *hdr, int last, void *rock)
1518 afs_int32 *nvols = (afs_int32 *)rock;
1524 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1527 struct SalvageScanParams {
1528 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1529 * vol id of the VG we're salvaging */
1530 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1531 * we're filling in */
1532 afs_int32 nVolumes; /**< # of vols we've encountered */
1533 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1534 * # of vols we've alloc'd memory for) */
1535 int retry; /**< do we need to retry vol lock/checkout? */
1536 struct SalvInfo *salvinfo; /**< salvage job info */
1540 * records volume summary info found from VWalkVolumeHeaders.
1542 * Found volumes are also taken offline if they are in the specific volume
1543 * group we are looking for.
1545 * @param[in] dp the disk partition
1546 * @param[in] name full path to the .vol header
1547 * @param[in] hdr the header data
1548 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1549 * @param[in] rock actually a struct SalvageScanParams*, containing the
1550 * information needed to record the volume summary data
1552 * @return operation status
1554 * @retval -1 volume locking raced with fileserver restart; checking out
1555 * and locking volumes needs to be retried
1556 * @retval 1 volume header is mis-named and should be deleted
1559 RecordHeader(struct DiskPartition64 *dp, const char *name,
1560 struct VolumeDiskHeader *hdr, int last, void *rock)
1562 char nameShouldBe[64];
1563 struct SalvageScanParams *params;
1564 struct VolumeSummary summary;
1565 VolumeId singleVolumeNumber;
1566 struct SalvInfo *salvinfo;
1568 params = (struct SalvageScanParams *)rock;
1570 memset(&summary, 0, sizeof(summary));
1572 singleVolumeNumber = params->singleVolumeNumber;
1573 salvinfo = params->salvinfo;
1575 DiskToVolumeHeader(&summary.header, hdr);
1577 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1578 && summary.header.parent != singleVolumeNumber) {
1580 if (programType == salvageServer) {
1581 #ifdef SALVSYNC_BUILD_CLIENT
1582 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1583 summary.header.id, summary.header.parent);
1584 if (SALVSYNC_LinkVolume(summary.header.parent,
1588 Log("schedule request failed\n");
1591 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1594 Log("%u is a read-only volume; not salvaged\n",
1595 singleVolumeNumber);
1600 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1601 || summary.header.parent == singleVolumeNumber) {
1603 /* check if the header file is incorrectly named */
1605 const char *base = strrchr(name, OS_DIRSEPC);
1612 snprintf(nameShouldBe, sizeof nameShouldBe,
1613 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1616 if (strcmp(nameShouldBe, base)) {
1617 /* .vol file has wrong name; retry/delete */
1621 if (!badname || last) {
1622 /* only offline the volume if the header is good, or if this is
1623 * the last try looking at it; avoid AskOffline'ing the same vol
1626 if (singleVolumeNumber
1627 && summary.header.id != singleVolumeNumber) {
1628 /* don't offline singleVolumeNumber; we already did that
1631 AskOffline(salvinfo, summary.header.id);
1633 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1635 /* don't lock the volume if the header is bad, since we're
1636 * about to delete it anyway. */
1637 if (LockVolume(salvinfo, summary.header.id)) {
1642 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1646 if (last && !Showmode) {
1647 Log("Volume header file %s is incorrectly named (should be %s "
1648 "not %s); %sdeleted (it will be recreated later, if "
1649 "necessary)\n", name, nameShouldBe, base,
1650 (Testing ? "it would have been " : ""));
1658 if (params->nVolumes > params->totalVolumes) {
1659 /* We found more volumes than we found on the first partition walk;
1660 * apparently something created a volume while we were
1661 * partition-salvaging, or we found more than 20 vols when salvaging a
1662 * particular volume. Abort if we detect this, since other programs
1663 * supposed to not touch the partition while it is partition-salvaging,
1664 * and we shouldn't find more than 20 vols in a VG.
1666 Abort("Found %ld vol headers, but should have found at most %ld! "
1667 "Make sure the volserver/fileserver are not running at the "
1668 "same time as a partition salvage\n",
1669 afs_printable_int32_ld(params->nVolumes),
1670 afs_printable_int32_ld(params->totalVolumes));
1673 memcpy(params->vsp, &summary, sizeof(summary));
1681 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1683 * If the header could not be read in at all, the header is always unlinked.
1684 * If instead RecordHeader said the header was bad (that is, the header file
1685 * is mis-named), we only unlink if we are doing a partition salvage, as
1686 * opposed to salvaging a specific volume group.
1688 * @param[in] dp the disk partition
1689 * @param[in] name full path to the .vol header
1690 * @param[in] hdr header data, or NULL if the header could not be read
1691 * @param[in] rock actually a struct SalvageScanParams*, with some information
1695 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1696 struct VolumeDiskHeader *hdr, void *rock)
1698 struct SalvageScanParams *params;
1701 params = (struct SalvageScanParams *)rock;
1704 /* no header; header is too bogus to read in at all */
1706 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1712 } else if (!params->singleVolumeNumber) {
1713 /* We were able to read in a header, but RecordHeader said something
1714 * was wrong with it. We only unlink those if we are doing a partition
1721 if (dounlink && unlink(name)) {
1722 Log("Error %d while trying to unlink %s\n", errno, name);
1727 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1728 * the fileserver for VG information, or by scanning the /vicepX partition.
1730 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1731 * are salvaging, or 0 if this is a partition
1734 * @return operation status
1736 * @retval -1 we raced with a fileserver restart; checking out and locking
1737 * volumes must be retried
1740 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1742 afs_int32 nvols = 0;
1743 struct SalvageScanParams params;
1746 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1748 /* we successfully got the vol information from the fileserver; no
1749 * need to scan the partition */
1753 /* we need to retry volume checkout */
1757 if (!singleVolumeNumber) {
1758 /* Count how many volumes we have in /vicepX */
1759 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1762 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1767 nvols = VOL_VG_MAX_VOLS;
1770 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1771 osi_Assert(salvinfo->volumeSummaryp != NULL);
1773 params.singleVolumeNumber = singleVolumeNumber;
1774 params.vsp = salvinfo->volumeSummaryp;
1775 params.nVolumes = 0;
1776 params.totalVolumes = nvols;
1778 params.salvinfo = salvinfo;
1780 /* walk the partition directory of volume headers and record the info
1781 * about them; unlinking invalid headers */
1782 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1783 UnlinkHeader, ¶ms);
1785 /* we apparently need to retry checking-out/locking volumes */
1789 Abort("Failed to get volume header summary\n");
1791 salvinfo->nVolumes = params.nVolumes;
1793 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1799 /* Find the link table. This should be associated with the RW volume or, if
1800 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1803 FindLinkHandle(struct InodeSummary *isp, int nVols,
1804 struct ViceInodeInfo *allInodes)
1807 struct ViceInodeInfo *ip;
1809 for (i = 0; i < nVols; i++) {
1810 ip = allInodes + isp[i].index;
1811 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1812 if (ip[j].u.special.type == VI_LINKTABLE)
1813 return ip[j].inodeNumber;
1820 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1822 struct versionStamp version;
1825 if (!VALID_INO(ino))
1827 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1828 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1829 if (!VALID_INO(ino))
1831 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1832 isp->RWvolumeId, errno);
1833 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1834 fdP = IH_OPEN(salvinfo->VGLinkH);
1836 Abort("Can't open link table for volume %u (error = %d)\n",
1837 isp->RWvolumeId, errno);
1839 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1840 Abort("Can't truncate link table for volume %u (error = %d)\n",
1841 isp->RWvolumeId, errno);
1843 version.magic = LINKTABLEMAGIC;
1844 version.version = LINKTABLEVERSION;
1846 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1848 Abort("Can't truncate link table for volume %u (error = %d)\n",
1849 isp->RWvolumeId, errno);
1851 FDH_REALLYCLOSE(fdP);
1853 /* If the volume summary exits (i.e., the V*.vol header file exists),
1854 * then set this inode there as well.
1856 if (isp->volSummary)
1857 isp->volSummary->header.linkTable = ino;
1866 SVGParms_t *parms = (SVGParms_t *) arg;
1867 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1872 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1875 pthread_attr_t tattr;
1879 /* Initialize per volume global variables, even if later code does so */
1880 salvinfo->VolumeChanged = 0;
1881 salvinfo->VGLinkH = NULL;
1882 salvinfo->VGLinkH_cnt = 0;
1883 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1885 parms.svgp_inodeSummaryp = isp;
1886 parms.svgp_count = nVols;
1887 parms.svgp_salvinfo = salvinfo;
1888 code = pthread_attr_init(&tattr);
1890 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1894 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1896 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1899 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1901 Log("Failed to create thread to salvage volume group %u\n",
1905 (void)pthread_join(tid, NULL);
1907 #endif /* AFS_NT40_ENV */
1910 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1912 struct ViceInodeInfo *inodes, *allInodes, *ip;
1913 int i, totalInodes, size, salvageTo;
1917 int dec_VGLinkH = 0;
1919 FdHandle_t *fdP = NULL;
1921 salvinfo->VGLinkH_cnt = 0;
1922 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1923 && isp->nSpecialInodes > 0);
1924 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1925 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1928 if (ShowMounts && !haveRWvolume)
1930 if (canfork && !debug && Fork() != 0) {
1931 (void)Wait("Salvage volume group");
1934 for (i = 0, totalInodes = 0; i < nVols; i++)
1935 totalInodes += isp[i].nInodes;
1936 size = totalInodes * sizeof(struct ViceInodeInfo);
1937 inodes = (struct ViceInodeInfo *)malloc(size);
1938 allInodes = inodes - isp->index; /* this would the base of all the inodes
1939 * for the partition, if all the inodes
1940 * had been read into memory */
1942 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1944 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1946 /* Don't try to salvage a read write volume if there isn't one on this
1948 salvageTo = haveRWvolume ? 0 : 1;
1950 #ifdef AFS_NAMEI_ENV
1951 ino = FindLinkHandle(isp, nVols, allInodes);
1952 if (VALID_INO(ino)) {
1953 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1954 fdP = IH_OPEN(salvinfo->VGLinkH);
1956 if (VALID_INO(ino) && fdP != NULL) {
1957 struct versionStamp header;
1958 afs_sfsize_t nBytes;
1960 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1961 if (nBytes != sizeof(struct versionStamp)
1962 || header.magic != LINKTABLEMAGIC) {
1963 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1964 FDH_REALLYCLOSE(fdP);
1968 if (!VALID_INO(ino) || fdP == NULL) {
1969 Log("%s link table for volume %u.\n",
1970 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1972 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1975 struct ViceInodeInfo *ip;
1976 CreateLinkTable(salvinfo, isp, ino);
1977 fdP = IH_OPEN(salvinfo->VGLinkH);
1978 /* Sync fake 1 link counts to the link table, now that it exists */
1980 for (i = 0; i < nVols; i++) {
1981 ip = allInodes + isp[i].index;
1982 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1983 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1990 FDH_REALLYCLOSE(fdP);
1992 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1995 /* Salvage in reverse order--read/write volume last; this way any
1996 * Inodes not referenced by the time we salvage the read/write volume
1997 * can be picked up by the read/write volume */
1998 /* ACTUALLY, that's not done right now--the inodes just vanish */
1999 for (i = nVols - 1; i >= salvageTo; i--) {
2001 struct InodeSummary *lisp = &isp[i];
2002 #ifdef AFS_NAMEI_ENV
2003 /* If only the RO is present on this partition, the link table
2004 * shows up as a RW volume special file. Need to make sure the
2005 * salvager doesn't try to salvage the non-existent RW.
2007 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
2008 /* If this only special inode is the link table, continue */
2009 if (inodes->u.special.type == VI_LINKTABLE) {
2016 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2017 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
2018 /* Check inodes twice. The second time do things seriously. This
2019 * way the whole RO volume can be deleted, below, if anything goes wrong */
2020 for (check = 1; check >= 0; check--) {
2022 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2024 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2025 if (rw && deleteMe) {
2026 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2027 * volume won't be called */
2033 if (rw && check == 1)
2035 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2036 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2042 /* Fix actual inode counts */
2045 Log("totalInodes %d\n",totalInodes);
2046 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2047 static int TraceBadLinkCounts = 0;
2048 #ifdef AFS_NAMEI_ENV
2049 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2050 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2051 VGLinkH_p1 = ip->u.param[0];
2052 continue; /* Deal with this last. */
2055 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2056 TraceBadLinkCounts--; /* Limit reports, per volume */
2057 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2059 while (ip->linkCount > 0) {
2060 /* below used to assert, not break */
2062 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2063 Log("idec failed. inode %s errno %d\n",
2064 PrintInode(stmp, ip->inodeNumber), errno);
2070 while (ip->linkCount < 0) {
2071 /* these used to be asserts */
2073 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2074 Log("iinc failed. inode %s errno %d\n",
2075 PrintInode(stmp, ip->inodeNumber), errno);
2082 #ifdef AFS_NAMEI_ENV
2083 while (dec_VGLinkH > 0) {
2084 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2085 Log("idec failed on link table, errno = %d\n", errno);
2089 while (dec_VGLinkH < 0) {
2090 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2091 Log("iinc failed on link table, errno = %d\n", errno);
2098 /* Directory consistency checks on the rw volume */
2100 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2101 IH_RELEASE(salvinfo->VGLinkH);
2103 if (canfork && !debug) {
2110 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2112 /* Check headers BEFORE forking */
2116 for (i = 0; i < nVols; i++) {
2117 struct VolumeSummary *vs = isp[i].volSummary;
2118 VolumeDiskData volHeader;
2120 /* Don't salvage just because phantom rw volume is there... */
2121 /* (If a read-only volume exists, read/write inodes must also exist) */
2122 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2126 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2127 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2128 == sizeof(volHeader)
2129 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2130 && volHeader.dontSalvage == DONT_SALVAGE
2131 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2132 if (volHeader.inUse != 0) {
2133 volHeader.inUse = 0;
2134 volHeader.inService = 1;
2136 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2137 != sizeof(volHeader)) {
2153 /* SalvageVolumeHeaderFile
2155 * Salvage the top level V*.vol header file. Make sure the special files
2156 * exist and that there are no duplicates.
2158 * Calls SalvageHeader for each possible type of volume special file.
2162 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2163 struct ViceInodeInfo *inodes, int RW,
2164 int check, int *deleteMe)
2167 struct ViceInodeInfo *ip;
2168 int allinodesobsolete = 1;
2169 struct VolumeDiskHeader diskHeader;
2170 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2172 struct VolumeHeader tempHeader;
2173 struct afs_inode_info stuff[MAXINODETYPE];
2175 /* keeps track of special inodes that are probably 'good'; they are
2176 * referenced in the vol header, and are included in the given inodes
2181 } goodspecial[MAXINODETYPE];
2186 memset(goodspecial, 0, sizeof(goodspecial));
2188 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2190 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2192 Log("cannot allocate memory for inode skip array when salvaging "
2193 "volume %lu; not performing duplicate special inode recovery\n",
2194 afs_printable_uint32_lu(isp->volumeId));
2195 /* still try to perform the salvage; the skip array only does anything
2196 * if we detect duplicate special inodes */
2199 init_inode_info(&tempHeader, stuff);
2202 * First, look at the special inodes and see if any are referenced by
2203 * the existing volume header. If we find duplicate special inodes, we
2204 * can use this information to use the referenced inode (it's more
2205 * likely to be the 'good' one), and throw away the duplicates.
2207 if (isp->volSummary && skip) {
2208 /* use tempHeader, so we can use the stuff[] array to easily index
2209 * into the isp->volSummary special inodes */
2210 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2212 for (i = 0; i < isp->nSpecialInodes; i++) {
2213 ip = &inodes[isp->index + i];
2214 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2215 /* will get taken care of in a later loop */
2218 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2219 goodspecial[ip->u.special.type-1].valid = 1;
2220 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2225 memset(&tempHeader, 0, sizeof(tempHeader));
2226 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2227 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2228 tempHeader.id = isp->volumeId;
2229 tempHeader.parent = isp->RWvolumeId;
2231 /* Check for duplicates (inodes are sorted by type field) */
2232 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2233 ip = &inodes[isp->index + i];
2234 if (ip->u.special.type == (ip + 1)->u.special.type) {
2235 afs_ino_str_t stmp1, stmp2;
2237 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2238 /* Will be caught in the loop below */
2242 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2243 ip->u.special.type, isp->volumeId,
2244 PrintInode(stmp1, ip->inodeNumber),
2245 PrintInode(stmp2, (ip+1)->inodeNumber));
2247 if (skip && goodspecial[ip->u.special.type-1].valid) {
2248 Inode gi = goodspecial[ip->u.special.type-1].inode;
2251 Log("using special inode referenced by vol header (%s)\n",
2252 PrintInode(stmp1, gi));
2255 /* the volume header references some special inode of
2256 * this type in the inodes array; are we it? */
2257 if (ip->inodeNumber != gi) {
2259 } else if ((ip+1)->inodeNumber != gi) {
2260 /* in case this is the last iteration; we need to
2261 * make sure we check ip+1, too */
2266 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2274 for (i = 0; i < isp->nSpecialInodes; i++) {
2276 ip = &inodes[isp->index + i];
2277 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2279 Log("Rubbish header inode %s of type %d\n",
2280 PrintInode(stmp, ip->inodeNumber),
2281 ip->u.special.type);
2287 Log("Rubbish header inode %s of type %d; deleted\n",
2288 PrintInode(stmp, ip->inodeNumber),
2289 ip->u.special.type);
2290 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2291 if (skip && skip[i]) {
2292 if (orphans == ORPH_REMOVE) {
2293 Log("Removing orphan special inode %s of type %d\n",
2294 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2297 Log("Ignoring orphan special inode %s of type %d\n",
2298 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2299 /* fall through to the ip->linkCount--; line below */
2302 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2303 allinodesobsolete = 0;
2305 if (!check && ip->u.special.type != VI_LINKTABLE)
2306 ip->linkCount--; /* Keep the inode around */
2314 if (allinodesobsolete) {
2321 salvinfo->VGLinkH_cnt++; /* one for every header. */
2323 if (!RW && !check && isp->volSummary) {
2324 ClearROInUseBit(isp->volSummary);
2328 for (i = 0; i < MAXINODETYPE; i++) {
2329 if (stuff[i].inodeType == VI_LINKTABLE) {
2330 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2331 * And we may have recreated the link table earlier, so set the
2332 * RW header as well. The header magic was already checked.
2334 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2335 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2339 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2343 if (isp->volSummary == NULL) {
2345 char headerName[64];
2346 snprintf(headerName, sizeof headerName, VFORMAT,
2347 afs_printable_uint32_lu(isp->volumeId));
2348 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2349 salvinfo->fileSysPath, headerName);
2351 Log("No header file for volume %u\n", isp->volumeId);
2355 Log("No header file for volume %u; %screating %s\n",
2356 isp->volumeId, (Testing ? "it would have been " : ""),
2358 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2360 writefunc = VCreateVolumeDiskHeader;
2363 char headerName[64];
2364 /* hack: these two fields are obsolete... */
2365 isp->volSummary->header.volumeAcl = 0;
2366 isp->volSummary->header.volumeMountTable = 0;
2369 (&isp->volSummary->header, &tempHeader,
2370 sizeof(struct VolumeHeader))) {
2371 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2372 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2373 salvinfo->fileSysPath, headerName);
2375 Log("Header file %s is damaged or no longer valid%s\n", path,
2376 (check ? "" : "; repairing"));
2380 writefunc = VWriteVolumeDiskHeader;
2384 memcpy(&isp->volSummary->header, &tempHeader,
2385 sizeof(struct VolumeHeader));
2388 Log("It would have written a new header file for volume %u\n",
2392 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2393 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2395 Log("Error %ld writing volume header file for volume %lu\n",
2396 afs_printable_int32_ld(code),
2397 afs_printable_uint32_lu(diskHeader.id));
2402 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2403 isp->volSummary->header.volumeInfo);
2408 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2409 struct InodeSummary *isp, int check, int *deleteMe)
2412 VolumeDiskData volumeInfo;
2413 struct versionStamp fileHeader;
2422 #ifndef AFS_NAMEI_ENV
2423 if (sp->inodeType == VI_LINKTABLE)
2424 return 0; /* header magic was already checked */
2426 if (*(sp->inode) == 0) {
2428 Log("Missing inode in volume header (%s)\n", sp->description);
2432 Log("Missing inode in volume header (%s); %s\n", sp->description,
2433 (Testing ? "it would have recreated it" : "recreating"));
2436 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2437 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2438 if (!VALID_INO(*(sp->inode)))
2440 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2441 sp->description, errno);
2446 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2447 fdP = IH_OPEN(specH);
2448 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2449 /* bail out early and destroy the volume */
2451 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2458 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2459 sp->description, errno);
2462 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2463 || header.fileHeader.magic != sp->stamp.magic)) {
2465 Log("Part of the header (%s) is corrupted\n", sp->description);
2466 FDH_REALLYCLOSE(fdP);
2470 Log("Part of the header (%s) is corrupted; recreating\n",
2473 /* header can be garbage; make sure we don't read garbage data from
2475 memset(&header, 0, sizeof(header));
2477 #ifdef AFS_NAMEI_ENV
2478 if (namei_FixSpecialOGM(fdP, check)) {
2479 Log("Error with namei header OGM data (%s)\n", sp->description);
2480 FDH_REALLYCLOSE(fdP);
2485 if (sp->inodeType == VI_VOLINFO
2486 && header.volumeInfo.destroyMe == DESTROY_ME) {
2489 FDH_REALLYCLOSE(fdP);
2493 if (recreate && !Testing) {
2496 ("Internal error: recreating volume header (%s) in check mode\n",
2498 nBytes = FDH_TRUNC(fdP, 0);
2500 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2501 sp->description, errno);
2503 /* The following code should be moved into vutil.c */
2504 if (sp->inodeType == VI_VOLINFO) {
2506 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2507 header.volumeInfo.stamp = sp->stamp;
2508 header.volumeInfo.id = isp->volumeId;
2509 header.volumeInfo.parentId = isp->RWvolumeId;
2510 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2511 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2512 isp->volumeId, isp->volumeId);
2513 header.volumeInfo.inService = 0;
2514 header.volumeInfo.blessed = 0;
2515 /* The + 1000 is a hack in case there are any files out in venus caches */
2516 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2517 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2518 header.volumeInfo.needsCallback = 0;
2519 gettimeofday(&tp, NULL);
2520 header.volumeInfo.creationDate = tp.tv_sec;
2522 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2523 sizeof(header.volumeInfo), 0);
2524 if (nBytes != sizeof(header.volumeInfo)) {
2527 ("Unable to write volume header file (%s) (errno = %d)\n",
2528 sp->description, errno);
2529 Abort("Unable to write entire volume header file (%s)\n",
2533 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2534 if (nBytes != sizeof(sp->stamp)) {
2537 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2538 sp->description, errno);
2540 ("Unable to write entire version stamp in volume header file (%s)\n",
2545 FDH_REALLYCLOSE(fdP);
2547 if (sp->inodeType == VI_VOLINFO) {
2548 salvinfo->VolInfo = header.volumeInfo;
2552 if (salvinfo->VolInfo.updateDate) {
2553 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2555 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2556 salvinfo->VolInfo.id,
2557 (Testing ? "it would have been " : ""), update);
2559 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2561 Log("%s (%u) not updated (created %s)\n",
2562 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2572 SalvageVnodes(struct SalvInfo *salvinfo,
2573 struct InodeSummary *rwIsp,
2574 struct InodeSummary *thisIsp,
2575 struct ViceInodeInfo *inodes, int check)
2577 int ilarge, ismall, ioffset, RW, nInodes;
2578 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2581 RW = (rwIsp == thisIsp);
2582 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2584 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2585 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2586 if (check && ismall == -1)
2589 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2590 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2591 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2595 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2596 struct ViceInodeInfo *ip, int nInodes,
2597 struct VolumeSummary *volSummary, int check)
2599 char buf[SIZEOF_LARGEDISKVNODE];
2600 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2602 StreamHandle_t *file;
2603 struct VnodeClassInfo *vcp;
2605 afs_sfsize_t nVnodes;
2606 afs_fsize_t vnodeLength;
2608 afs_ino_str_t stmp1, stmp2;
2612 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2613 fdP = IH_OPEN(handle);
2614 osi_Assert(fdP != NULL);
2615 file = FDH_FDOPEN(fdP, "r+");
2616 osi_Assert(file != NULL);
2617 vcp = &VnodeClassInfo[class];
2618 size = OS_SIZE(fdP->fd_fd);
2619 osi_Assert(size != -1);
2620 nVnodes = (size / vcp->diskSize) - 1;
2622 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2623 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2627 for (vnodeIndex = 0;
2628 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2629 nVnodes--, vnodeIndex++) {
2630 if (vnode->type != vNull) {
2631 int vnodeChanged = 0;
2632 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2633 if (VNDISK_GET_INO(vnode) == 0) {
2635 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2636 memset(vnode, 0, vcp->diskSize);
2640 if (vcp->magic != vnode->vnodeMagic) {
2641 /* bad magic #, probably partially created vnode */
2643 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2644 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2645 afs_printable_uint32_lu(vcp->magic));
2646 memset(vnode, 0, vcp->diskSize);
2650 Log("Partially allocated vnode %d deleted.\n",
2652 memset(vnode, 0, vcp->diskSize);
2656 /* ****** Should do a bit more salvage here: e.g. make sure
2657 * vnode type matches what it should be given the index */
2658 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2659 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2660 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2661 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2668 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2669 /* The following doesn't work, because the version number
2670 * is not maintained correctly by the file server */
2671 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2672 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2674 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2680 /* For RW volume, look for vnode with matching inode number;
2681 * if no such match, take the first determined by our sort
2683 struct ViceInodeInfo *lip = ip;
2684 int lnInodes = nInodes;
2686 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2687 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2696 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2697 /* "Matching" inode */
2701 vu = vnode->uniquifier;
2702 iu = ip->u.vnode.vnodeUniquifier;
2703 vd = vnode->dataVersion;
2704 id = ip->u.vnode.inodeDataVersion;
2706 * Because of the possibility of the uniquifier overflows (> 4M)
2707 * we compare them modulo the low 22-bits; we shouldn't worry
2708 * about mismatching since they shouldn't to many old
2709 * uniquifiers of the same vnode...
2711 if (IUnique(vu) != IUnique(iu)) {
2713 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2716 vnode->uniquifier = iu;
2717 #ifdef AFS_3DISPARES
2718 vnode->dataVersion = (id >= vd ?
2721 1887437 ? vd : id) :
2724 1887437 ? id : vd));
2726 #if defined(AFS_SGI_EXMAG)
2727 vnode->dataVersion = (id >= vd ?
2730 15099494 ? vd : id) :
2733 15099494 ? id : vd));
2735 vnode->dataVersion = (id > vd ? id : vd);
2736 #endif /* AFS_SGI_EXMAG */
2737 #endif /* AFS_3DISPARES */
2740 /* don't bother checking for vd > id any more, since
2741 * partial file transfers always result in this state,
2742 * and you can't do much else anyway (you've already
2743 * found the best data you can) */
2744 #ifdef AFS_3DISPARES
2745 if (!vnodeIsDirectory(vnodeNumber)
2746 && ((vd < id && (id - vd) < 1887437)
2747 || ((vd > id && (vd - id) > 1887437)))) {
2749 #if defined(AFS_SGI_EXMAG)
2750 if (!vnodeIsDirectory(vnodeNumber)
2751 && ((vd < id && (id - vd) < 15099494)
2752 || ((vd > id && (vd - id) > 15099494)))) {
2754 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2755 #endif /* AFS_SGI_EXMAG */
2758 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2759 vnode->dataVersion = id;
2764 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2767 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2769 VNDISK_SET_INO(vnode, ip->inodeNumber);
2774 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2776 VNDISK_SET_INO(vnode, ip->inodeNumber);
2779 VNDISK_GET_LEN(vnodeLength, vnode);
2780 if (ip->byteCount != vnodeLength) {
2783 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2788 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2789 VNDISK_SET_LEN(vnode, ip->byteCount);
2793 ip->linkCount--; /* Keep the inode around */
2796 } else { /* no matching inode */
2798 if (VNDISK_GET_INO(vnode) != 0
2799 || vnode->type == vDirectory) {
2800 /* No matching inode--get rid of the vnode */
2802 if (VNDISK_GET_INO(vnode)) {
2804 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2808 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2813 if (VNDISK_GET_INO(vnode)) {
2815 time_t serverModifyTime = vnode->serverModifyTime;
2816 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2820 time_t serverModifyTime = vnode->serverModifyTime;
2821 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2824 memset(vnode, 0, vcp->diskSize);
2827 /* Should not reach here becuase we checked for
2828 * (inodeNumber == 0) above. And where we zero the vnode,
2829 * we also goto vnodeDone.
2833 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2837 } /* VNDISK_GET_INO(vnode) != 0 */
2839 osi_Assert(!(vnodeChanged && check));
2840 if (vnodeChanged && !Testing) {
2841 osi_Assert(IH_IWRITE
2842 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2843 (char *)vnode, vcp->diskSize)
2845 salvinfo->VolumeChanged = 1; /* For break call back */
2856 struct VnodeEssence *
2857 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2860 struct VnodeInfo *vip;
2863 class = vnodeIdToClass(vnodeNumber);
2864 vip = &salvinfo->vnodeInfo[class];
2865 offset = vnodeIdToBitNumber(vnodeNumber);
2866 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2870 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2872 /* Copy the directory unconditionally if we are going to change it:
2873 * not just if was cloned.
2875 struct VnodeDiskObject vnode;
2876 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2877 Inode oldinode, newinode;
2880 if (dir->copied || Testing)
2882 DFlush(); /* Well justified paranoia... */
2885 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2886 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2888 osi_Assert(code == sizeof(vnode));
2889 oldinode = VNDISK_GET_INO(&vnode);
2890 /* Increment the version number by a whole lot to avoid problems with
2891 * clients that were promised new version numbers--but the file server
2892 * crashed before the versions were written to disk.
2895 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2896 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2898 osi_Assert(VALID_INO(newinode));
2899 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2901 VNDISK_SET_INO(&vnode, newinode);
2903 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2904 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2906 osi_Assert(code == sizeof(vnode));
2908 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2909 salvinfo->fileSysDevice, newinode,
2910 &salvinfo->VolumeChanged);
2911 /* Don't delete the original inode right away, because the directory is
2912 * still being scanned.
2918 * This function should either successfully create a new dir, or give up
2919 * and leave things the way they were. In particular, if it fails to write
2920 * the new dir properly, it should return w/o changing the reference to the
2924 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2926 struct VnodeDiskObject vnode;
2927 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2928 Inode oldinode, newinode;
2933 afs_int32 parentUnique = 1;
2934 struct VnodeEssence *vnodeEssence;
2939 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2941 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2942 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2944 osi_Assert(lcode == sizeof(vnode));
2945 oldinode = VNDISK_GET_INO(&vnode);
2946 /* Increment the version number by a whole lot to avoid problems with
2947 * clients that were promised new version numbers--but the file server
2948 * crashed before the versions were written to disk.
2951 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2952 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2954 osi_Assert(VALID_INO(newinode));
2955 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2956 &salvinfo->VolumeChanged);
2958 /* Assign . and .. vnode numbers from dir and vnode.parent.
2959 * The uniquifier for . is in the vnode.
2960 * The uniquifier for .. might be set to a bogus value of 1 and
2961 * the salvager will later clean it up.
2963 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2964 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2967 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2969 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2974 /* didn't really build the new directory properly, let's just give up. */
2975 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2976 Log("Directory salvage returned code %d, continuing.\n", code);
2978 Log("also failed to decrement link count on new inode");
2982 Log("Checking the results of the directory salvage...\n");
2983 if (!DirOK(&newdir)) {
2984 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2985 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2986 osi_Assert(code == 0);
2990 VNDISK_SET_INO(&vnode, newinode);
2991 length = afs_dir_Length(&newdir);
2992 VNDISK_SET_LEN(&vnode, length);
2994 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2995 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2997 osi_Assert(lcode == sizeof(vnode));
3000 nt_sync(salvinfo->fileSysDevice);
3002 sync(); /* this is slow, but hopefully rarely called. We don't have
3003 * an open FD on the file itself to fsync.
3007 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
3009 /* make sure old directory file is really closed */
3010 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3011 FDH_REALLYCLOSE(fdP);
3013 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3014 osi_Assert(code == 0);
3015 dir->dirHandle = newdir;
3019 * arguments for JudgeEntry.
3021 struct judgeEntry_params {
3022 struct DirSummary *dir; /**< directory we're examining entries in */
3023 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3027 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3030 struct judgeEntry_params *params = arock;
3031 struct DirSummary *dir = params->dir;
3032 struct SalvInfo *salvinfo = params->salvinfo;
3033 struct VnodeEssence *vnodeEssence;
3034 afs_int32 dirOrphaned, todelete;
3036 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3038 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3039 if (vnodeEssence == NULL) {
3041 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3044 CopyOnWrite(salvinfo, dir);
3045 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3050 #ifndef AFS_NAMEI_ENV
3051 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3052 * mount inode for the partition. If this inode were deleted, it would crash
3055 if (vnodeEssence->InodeNumber == 0) {
3056 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3058 CopyOnWrite(salvinfo, dir);
3059 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3066 if (!(vnodeNumber & 1) && !Showmode
3067 && !(vnodeEssence->count || vnodeEssence->unique
3068 || vnodeEssence->modeBits)) {
3069 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3070 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3071 vnodeNumber, unique,
3072 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3076 CopyOnWrite(salvinfo, dir);
3077 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3083 /* Check if the Uniquifiers match. If not, change the directory entry
3084 * so its unique matches the vnode unique. Delete if the unique is zero
3085 * or if the directory is orphaned.
3087 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3088 if (!vnodeEssence->unique
3089 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3090 /* This is an orphaned directory. Don't delete the . or ..
3091 * entry. Otherwise, it will get created in the next
3092 * salvage and deleted again here. So Just skip it.
3097 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3100 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3104 fid.Vnode = vnodeNumber;
3105 fid.Unique = vnodeEssence->unique;
3106 CopyOnWrite(salvinfo, dir);
3107 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3109 osi_Assert(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3112 return 0; /* no need to continue */
3115 if (strcmp(name, ".") == 0) {
3116 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3118 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3121 CopyOnWrite(salvinfo, dir);
3122 osi_Assert(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3123 fid.Vnode = dir->vnodeNumber;
3124 fid.Unique = dir->unique;
3125 osi_Assert(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3126 vnodeNumber = fid.Vnode; /* Get the new Essence */
3127 unique = fid.Unique;
3128 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3132 } else if (strcmp(name, "..") == 0) {
3135 struct VnodeEssence *dotdot;
3136 pa.Vnode = dir->parent;
3137 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3138 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3139 pa.Unique = dotdot->unique;
3141 pa.Vnode = dir->vnodeNumber;
3142 pa.Unique = dir->unique;
3144 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3146 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3148 CopyOnWrite(salvinfo, dir);
3149 osi_Assert(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3150 osi_Assert(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3153 vnodeNumber = pa.Vnode; /* Get the new Essence */
3155 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3157 dir->haveDotDot = 1;
3158 } else if (strncmp(name, ".__afs", 6) == 0) {
3160 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3163 CopyOnWrite(salvinfo, dir);
3164 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3166 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3167 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3170 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3171 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3172 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3173 && !(vnodeEssence->modeBits & 0111)) {
3174 afs_sfsize_t nBytes;
3180 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3181 vnodeEssence->InodeNumber);
3184 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3188 size = FDH_SIZE(fdP);
3190 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3191 FDH_REALLYCLOSE(fdP);
3198 nBytes = FDH_PREAD(fdP, buf, size, 0);
3199 if (nBytes == size) {
3201 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3202 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3203 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3204 Testing ? "would convert" : "converted");
3205 vnodeEssence->modeBits |= 0111;
3206 vnodeEssence->changed = 1;
3207 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3208 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3209 dir->name ? dir->name : "??", name, buf);
3211 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3212 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3214 FDH_REALLYCLOSE(fdP);
3217 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3218 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3219 if (vnodeIdToClass(vnodeNumber) == vLarge
3220 && vnodeEssence->name == NULL) {
3222 if ((n = (char *)malloc(strlen(name) + 1)))
3224 vnodeEssence->name = n;
3227 /* The directory entry points to the vnode. Check to see if the
3228 * vnode points back to the directory. If not, then let the
3229 * directory claim it (else it might end up orphaned). Vnodes
3230 * already claimed by another directory are deleted from this
3231 * directory: hardlinks to the same vnode are not allowed
3232 * from different directories.
3234 if (vnodeEssence->parent != dir->vnodeNumber) {
3235 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3236 /* Vnode does not point back to this directory.
3237 * Orphaned dirs cannot claim a file (it may belong to
3238 * another non-orphaned dir).
3241 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3243 vnodeEssence->parent = dir->vnodeNumber;
3244 vnodeEssence->changed = 1;
3246 /* Vnode was claimed by another directory */
3249 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3250 } else if (vnodeNumber == 1) {
3251 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3253 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3257 CopyOnWrite(salvinfo, dir);
3258 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3263 /* This directory claims the vnode */
3264 vnodeEssence->claimed = 1;
3266 vnodeEssence->count--;
3271 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3272 VnodeClass class, Inode ino, Unique * maxu)
3274 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3275 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3276 char buf[SIZEOF_LARGEDISKVNODE];
3277 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3279 StreamHandle_t *file;
3284 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3285 fdP = IH_OPEN(vip->handle);
3286 osi_Assert(fdP != NULL);
3287 file = FDH_FDOPEN(fdP, "r+");
3288 osi_Assert(file != NULL);
3289 size = OS_SIZE(fdP->fd_fd);
3290 osi_Assert(size != -1);
3291 vip->nVnodes = (size / vcp->diskSize) - 1;
3292 if (vip->nVnodes > 0) {
3293 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3294 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3295 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3296 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3297 if (class == vLarge) {
3298 osi_Assert((vip->inodes = (Inode *)
3299 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3308 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3309 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3310 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3311 nVnodes--, vnodeIndex++) {
3312 if (vnode->type != vNull) {
3313 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3314 afs_fsize_t vnodeLength;
3315 vip->nAllocatedVnodes++;
3316 vep->count = vnode->linkCount;
3317 VNDISK_GET_LEN(vnodeLength, vnode);
3318 vep->blockCount = nBlocks(vnodeLength);
3319 vip->volumeBlockCount += vep->blockCount;
3320 vep->parent = vnode->parent;
3321 vep->unique = vnode->uniquifier;
3322 if (*maxu < vnode->uniquifier)
3323 *maxu = vnode->uniquifier;
3324 vep->modeBits = vnode->modeBits;
3325 vep->InodeNumber = VNDISK_GET_INO(vnode);
3326 vep->type = vnode->type;
3327 vep->author = vnode->author;
3328 vep->owner = vnode->owner;
3329 vep->group = vnode->group;
3330 if (vnode->type == vDirectory) {
3331 if (class != vLarge) {
3332 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3333 vip->nAllocatedVnodes--;
3334 memset(vnode, 0, sizeof(*vnode));
3335 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3336 vnodeIndexOffset(vcp, vnodeNumber),
3337 (char *)&vnode, sizeof(vnode));
3338 salvinfo->VolumeChanged = 1;
3340 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3349 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3352 struct VnodeEssence *parentvp;
3358 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3359 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3360 strcat(path, OS_DIRSEP);
3361 strcat(path, vp->name);
3367 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3368 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3371 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3373 struct VnodeEssence *vep;
3376 return (1); /* Vnode zero does not exist */
3378 return (0); /* The root dir vnode is always claimed */
3379 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3380 if (!vep || !vep->claimed)
3381 return (1); /* Vnode is not claimed - it is orphaned */
3383 return (IsVnodeOrphaned(salvinfo, vep->parent));
3387 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3388 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3389 struct DirSummary *rootdir, int *rootdirfound)
3391 static struct DirSummary dir;
3392 static struct DirHandle dirHandle;
3393 struct VnodeEssence *parent;
3394 static char path[MAXPATHLEN];
3397 if (dirVnodeInfo->vnodes[i].salvaged)
3398 return; /* already salvaged */
3401 dirVnodeInfo->vnodes[i].salvaged = 1;
3403 if (dirVnodeInfo->inodes[i] == 0)
3404 return; /* Not allocated to a directory */
3406 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3407 if (dirVnodeInfo->vnodes[i].parent) {
3408 Log("Bad parent, vnode 1; %s...\n",
3409 (Testing ? "skipping" : "salvaging"));
3410 dirVnodeInfo->vnodes[i].parent = 0;
3411 dirVnodeInfo->vnodes[i].changed = 1;
3414 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3415 if (parent && parent->salvaged == 0)
3416 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3417 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3418 rootdir, rootdirfound);
3421 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3422 dir.unique = dirVnodeInfo->vnodes[i].unique;
3425 dir.parent = dirVnodeInfo->vnodes[i].parent;
3426 dir.haveDot = dir.haveDotDot = 0;
3427 dir.ds_linkH = alinkH;
3428 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3429 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3431 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3434 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3435 (Testing ? "skipping" : "salvaging"));
3438 CopyAndSalvage(salvinfo, &dir);
3440 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3443 dirHandle = dir.dirHandle;
3446 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3447 &dirVnodeInfo->vnodes[i], path);
3450 /* If enumeration failed for random reasons, we will probably delete
3451 * too much stuff, so we guard against this instead.
3453 struct judgeEntry_params judge_params;
3454 judge_params.salvinfo = salvinfo;
3455 judge_params.dir = &dir;
3457 osi_Assert(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3458 &judge_params) == 0);
3461 /* Delete the old directory if it was copied in order to salvage.
3462 * CopyOnWrite has written the new inode # to the disk, but we still
3463 * have the old one in our local structure here. Thus, we idec the
3467 if (dir.copied && !Testing) {
3468 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3469 osi_Assert(code == 0);
3470 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3473 /* Remember rootdir DirSummary _after_ it has been judged */
3474 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3475 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3483 * Get a new FID that can be used to create a new file.
3485 * @param[in] volHeader vol header for the volume
3486 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3487 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3488 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3489 * updated to the new max unique if we create a new
3493 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3494 VnodeClass class, AFSFid *afid, Unique *maxunique)
3497 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3498 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3502 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3503 /* no free vnodes; make a new one */
3504 salvinfo->vnodeInfo[class].nVnodes++;
3505 salvinfo->vnodeInfo[class].vnodes =
3506 realloc(salvinfo->vnodeInfo[class].vnodes,
3507 sizeof(struct VnodeEssence) * (i+1));
3509 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3512 afid->Vnode = bitNumberToVnodeNumber(i, class);
3514 if (volHeader->uniquifier < (*maxunique + 1)) {
3515 /* header uniq is bad; it will get bumped by 2000 later */
3516 afid->Unique = *maxunique + 1 + 2000;
3519 /* header uniq seems okay; just use that */
3520 afid->Unique = *maxunique = volHeader->uniquifier++;
3525 * Create a vnode for a README file explaining not to use a recreated-root vol.
3527 * @param[in] volHeader vol header for the volume
3528 * @param[in] alinkH ihandle for i/o for the volume
3529 * @param[in] vid volume id
3530 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3531 * updated to the new max unique if we create a new
3533 * @param[out] afid FID for the new readme vnode
3534 * @param[out] ainode the inode for the new readme file
3536 * @return operation status
3541 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3542 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3546 struct VnodeDiskObject *rvnode = NULL;
3548 IHandle_t *readmeH = NULL;
3549 struct VnodeEssence *vep;
3551 time_t now = time(NULL);
3553 /* Try to make the note brief, but informative. Only administrators should
3554 * be able to read this file at first, so we can hopefully assume they
3555 * know what AFS is, what a volume is, etc. */
3557 "This volume has been salvaged, but has lost its original root directory.\n"
3558 "The root directory that exists now has been recreated from orphan files\n"
3559 "from the rest of the volume. This recreated root directory may interfere\n"
3560 "with old cached data on clients, and there is no way the salvager can\n"
3561 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3562 "use this volume, but only copy the salvaged data to a new volume.\n"
3563 "Continuing to use this volume as it exists now may cause some clients to\n"
3564 "behave oddly when accessing this volume.\n"
3565 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3566 /* ^ the person reading this probably just lost some data, so they could
3567 * use some cheering up. */
3569 /* -1 for the trailing NUL */
3570 length = sizeof(readme) - 1;
3572 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3574 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3576 /* create the inode and write the contents */
3577 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3578 salvinfo->fileSysPath, 0, vid,
3579 afid->Vnode, afid->Unique, 1);
3580 if (!VALID_INO(readmeinode)) {
3581 Log("CreateReadme: readme IH_CREATE failed\n");
3585 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3586 bytes = IH_IWRITE(readmeH, 0, readme, length);
3587 IH_RELEASE(readmeH);
3589 if (bytes != length) {
3590 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3591 (int)sizeof(readme));
3595 /* create the vnode and write it out */
3596 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3598 Log("CreateRootDir: error alloc'ing memory\n");
3602 rvnode->type = vFile;
3604 rvnode->modeBits = 0777;
3605 rvnode->linkCount = 1;
3606 VNDISK_SET_LEN(rvnode, length);
3607 rvnode->uniquifier = afid->Unique;
3608 rvnode->dataVersion = 1;
3609 VNDISK_SET_INO(rvnode, readmeinode);
3610 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3615 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3617 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3618 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3619 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3621 if (bytes != SIZEOF_SMALLDISKVNODE) {
3622 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3623 (int)SIZEOF_SMALLDISKVNODE);
3627 /* update VnodeEssence for new readme vnode */
3628 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3630 vep->blockCount = nBlocks(length);
3631 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3632 vep->parent = rvnode->parent;
3633 vep->unique = rvnode->uniquifier;
3634 vep->modeBits = rvnode->modeBits;
3635 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3636 vep->type = rvnode->type;
3637 vep->author = rvnode->author;
3638 vep->owner = rvnode->owner;
3639 vep->group = rvnode->group;
3649 *ainode = readmeinode;
3654 if (IH_DEC(alinkH, readmeinode, vid)) {
3655 Log("CreateReadme (recovery): IH_DEC failed\n");
3667 * create a root dir for a volume that lacks one.
3669 * @param[in] volHeader vol header for the volume
3670 * @param[in] alinkH ihandle for disk access for this volume group
3671 * @param[in] vid volume id we're dealing with
3672 * @param[out] rootdir populated with info about the new root dir
3673 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3674 * updated to the new max unique if we create a new
3677 * @return operation status
3682 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3683 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3687 int decroot = 0, decreadme = 0;
3688 AFSFid did, readmeid;
3691 struct VnodeDiskObject *rootvnode = NULL;
3692 struct acl_accessList *ACL;
3695 struct VnodeEssence *vep;
3696 Inode readmeinode = 0;
3697 time_t now = time(NULL);
3699 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3700 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3704 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3705 /* We don't have any large vnodes in the volume; allocate room
3706 * for one so we can recreate the root dir */
3707 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3708 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3709 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3711 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3712 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3715 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3716 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3717 if (vep->type != vNull) {
3718 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3722 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3723 &readmeinode) != 0) {
3728 /* set the DV to a very high number, so it is unlikely that we collide
3729 * with a cached DV */
3732 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3734 if (!VALID_INO(rootinode)) {
3735 Log("CreateRootDir: IH_CREATE failed\n");
3740 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3741 rootinode, &salvinfo->VolumeChanged);
3745 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3746 Log("CreateRootDir: MakeDir failed\n");
3749 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3750 Log("CreateRootDir: Create failed\n");
3754 length = afs_dir_Length(&rootdir->dirHandle);
3755 DZap(&rootdir->dirHandle);
3757 /* create the new root dir vnode */
3758 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3760 Log("CreateRootDir: malloc failed\n");
3764 /* only give 'rl' permissions to 'system:administrators'. We do this to
3765 * try to catch the attention of an administrator, that they should not
3766 * be writing to this directory or continue to use it. */
3767 ACL = VVnodeDiskACL(rootvnode);
3768 ACL->size = sizeof(struct acl_accessList);
3769 ACL->version = ACL_ACLVERSION;
3773 ACL->entries[0].id = -204; /* system:administrators */
3774 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3776 rootvnode->type = vDirectory;
3777 rootvnode->cloned = 0;
3778 rootvnode->modeBits = 0777;
3779 rootvnode->linkCount = 2;
3780 VNDISK_SET_LEN(rootvnode, length);
3781 rootvnode->uniquifier = 1;
3782 rootvnode->dataVersion = dv;
3783 VNDISK_SET_INO(rootvnode, rootinode);
3784 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3785 rootvnode->author = 0;
3786 rootvnode->owner = 0;
3787 rootvnode->parent = 0;
3788 rootvnode->group = 0;
3789 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3791 /* write it out to disk */
3792 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3793 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3794 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3796 if (bytes != SIZEOF_LARGEDISKVNODE) {
3797 /* just cast to int and don't worry about printing real 64-bit ints;
3798 * a large disk vnode isn't anywhere near the 32-bit limit */
3799 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3800 (int)SIZEOF_LARGEDISKVNODE);
3804 /* update VnodeEssence for the new root vnode */
3805 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3807 vep->blockCount = nBlocks(length);
3808 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3809 vep->parent = rootvnode->parent;
3810 vep->unique = rootvnode->uniquifier;
3811 vep->modeBits = rootvnode->modeBits;
3812 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3813 vep->type = rootvnode->type;
3814 vep->author = rootvnode->author;
3815 vep->owner = rootvnode->owner;
3816 vep->group = rootvnode->group;
3826 /* update DirSummary for the new root vnode */
3827 rootdir->vnodeNumber = 1;
3828 rootdir->unique = 1;
3829 rootdir->haveDot = 1;
3830 rootdir->haveDotDot = 1;
3831 rootdir->rwVid = vid;
3832 rootdir->copied = 0;
3833 rootdir->parent = 0;
3834 rootdir->name = strdup(".");
3835 rootdir->vname = volHeader->name;
3836 rootdir->ds_linkH = alinkH;
3843 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3844 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3846 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3847 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3857 * salvage a volume group.
3859 * @param[in] salvinfo information for the curent salvage job
3860 * @param[in] rwIsp inode summary for rw volume
3861 * @param[in] alinkH link table inode handle
3863 * @return operation status
3867 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3869 /* This routine, for now, will only be called for read-write volumes */
3871 int BlocksInVolume = 0, FilesInVolume = 0;
3873 struct DirSummary rootdir, oldrootdir;
3874 struct VnodeInfo *dirVnodeInfo;
3875 struct VnodeDiskObject vnode;
3876 VolumeDiskData volHeader;
3878 int orphaned, rootdirfound = 0;
3879 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3880 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3881 struct VnodeEssence *vep;
3884 afs_sfsize_t nBytes;
3886 VnodeId LFVnode, ThisVnode;
3887 Unique LFUnique, ThisUnique;
3891 vid = rwIsp->volSummary->header.id;
3892 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3893 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3894 osi_Assert(nBytes == sizeof(volHeader));
3895 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3896 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3897 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3899 DistilVnodeEssence(salvinfo, vid, vLarge,
3900 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3901 DistilVnodeEssence(salvinfo, vid, vSmall,
3902 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3904 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3905 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3906 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3907 &rootdir, &rootdirfound);
3910 nt_sync(salvinfo->fileSysDevice);
3912 sync(); /* This used to be done lower level, for every dir */
3919 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3921 Log("Cannot find root directory for volume %lu; attempting to create "
3922 "a new one\n", afs_printable_uint32_lu(vid));
3924 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3929 salvinfo->VolumeChanged = 1;
3933 /* Parse each vnode looking for orphaned vnodes and
3934 * connect them to the tree as orphaned (if requested).
3936 oldrootdir = rootdir;
3937 for (class = 0; class < nVNODECLASSES; class++) {
3938 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3939 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3940 ThisVnode = bitNumberToVnodeNumber(v, class);
3941 ThisUnique = vep->unique;
3943 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3944 continue; /* Ignore unused, claimed, and root vnodes */
3946 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3947 * entry in this vnode had incremented the parent link count (In
3948 * JudgeEntry()). We need to go to the parent and decrement that
3949 * link count. But if the parent's unique is zero, then the parent
3950 * link count was not incremented in JudgeEntry().
3952 if (class == vLarge) { /* directory vnode */
3953 pv = vnodeIdToBitNumber(vep->parent);
3954 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3955 if (vep->parent == 1 && newrootdir) {
3956 /* this vnode's parent was the volume root, and
3957 * we just created the volume root. So, the parent
3958 * dir didn't exist during JudgeEntry, so the link
3959 * count was not inc'd there, so don't dec it here.
3965 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3971 continue; /* If no rootdir, can't attach orphaned files */
3973 /* Here we attach orphaned files and directories into the
3974 * root directory, LVVnode, making sure link counts stay correct.
3976 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3977 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3978 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3980 /* Update this orphaned vnode's info. Its parent info and
3981 * link count (do for orphaned directories and files).
3983 vep->parent = LFVnode; /* Parent is the root dir */
3984 vep->unique = LFUnique;
3987 vep->count--; /* Inc link count (root dir will pt to it) */
3989 /* If this orphaned vnode is a directory, change '..'.
3990 * The name of the orphaned dir/file is unknown, so we
3991 * build a unique name. No need to CopyOnWrite the directory
3992 * since it is not connected to tree in BK or RO volume and
3993 * won't be visible there.
3995 if (class == vLarge) {
3999 /* Remove and recreate the ".." entry in this orphaned directory */
4000 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4001 salvinfo->vnodeInfo[class].inodes[v],
4002 &salvinfo->VolumeChanged);
4004 pa.Unique = LFUnique;
4005 osi_Assert(afs_dir_Delete(&dh, "..") == 0);
4006 osi_Assert(afs_dir_Create(&dh, "..", &pa) == 0);
4008 /* The original parent's link count was decremented above.
4009 * Here we increment the new parent's link count.
4011 pv = vnodeIdToBitNumber(LFVnode);
4012 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4016 /* Go to the root dir and add this entry. The link count of the
4017 * root dir was incremented when ".." was created. Try 10 times.
4019 for (j = 0; j < 10; j++) {
4020 pa.Vnode = ThisVnode;
4021 pa.Unique = ThisUnique;
4023 snprintf(npath, sizeof npath, "%s.%u.%u",
4024 ((class == vLarge) ? "__ORPHANDIR__"
4025 : "__ORPHANFILE__"),
4026 ThisVnode, ThisUnique);
4028 CopyOnWrite(salvinfo, &rootdir);
4029 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4033 ThisUnique += 50; /* Try creating a different file */
4035 osi_Assert(code == 0);
4036 Log("Attaching orphaned %s to volume's root dir as %s\n",
4037 ((class == vLarge) ? "directory" : "file"), npath);
4039 } /* for each vnode in the class */
4040 } /* for each class of vnode */
4042 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4044 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4046 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4048 osi_Assert(code == 0);
4049 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4052 DFlush(); /* Flush the changes */
4053 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4054 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4055 orphans = ORPH_IGNORE;
4058 /* Write out all changed vnodes. Orphaned files and directories
4059 * will get removed here also (if requested).
4061 for (class = 0; class < nVNODECLASSES; class++) {
4062 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4063 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4064 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4065 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4066 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4067 for (i = 0; i < nVnodes; i++) {
4068 struct VnodeEssence *vnp = &vnodes[i];
4069 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4071 /* If the vnode is good but is unclaimed (not listed in
4072 * any directory entries), then it is orphaned.
4075 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4076 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4080 if (vnp->changed || vnp->count) {
4083 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4084 vnodeIndexOffset(vcp, vnodeNumber),
4085 (char *)&vnode, sizeof(vnode));
4086 osi_Assert(nBytes == sizeof(vnode));
4088 vnode.parent = vnp->parent;
4089 oldCount = vnode.linkCount;
4090 vnode.linkCount = vnode.linkCount - vnp->count;
4093 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4095 if (!vnp->todelete) {
4096 /* Orphans should have already been attached (if requested) */
4097 osi_Assert(orphans != ORPH_ATTACH);
4098 oblocks += vnp->blockCount;
4101 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4103 BlocksInVolume -= vnp->blockCount;
4105 if (VNDISK_GET_INO(&vnode)) {
4107 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4108 osi_Assert(code == 0);
4110 memset(&vnode, 0, sizeof(vnode));
4112 } else if (vnp->count) {
4114 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4117 vnode.modeBits = vnp->modeBits;
4120 vnode.dataVersion++;
4123 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4124 vnodeIndexOffset(vcp, vnodeNumber),
4125 (char *)&vnode, sizeof(vnode));
4126 osi_Assert(nBytes == sizeof(vnode));
4128 salvinfo->VolumeChanged = 1;
4132 if (!Showmode && ofiles) {
4133 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4135 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4139 for (class = 0; class < nVNODECLASSES; class++) {
4140 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4141 for (i = 0; i < vip->nVnodes; i++)
4142 if (vip->vnodes[i].name)
4143 free(vip->vnodes[i].name);
4150 /* Set correct resource utilization statistics */
4151 volHeader.filecount = FilesInVolume;
4152 volHeader.diskused = BlocksInVolume;
4154 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4155 if (volHeader.uniquifier < (maxunique + 1)) {
4157 Log("Volume uniquifier is too low; fixed\n");
4158 /* Plus 2,000 in case there are workstations out there with
4159 * cached vnodes that have since been deleted
4161 volHeader.uniquifier = (maxunique + 1 + 2000);
4165 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4166 "Only use this salvaged volume to copy data to another volume; "
4167 "do not continue to use this volume (%lu) as-is.\n",
4168 afs_printable_uint32_lu(vid));
4171 if (!Testing && salvinfo->VolumeChanged) {
4172 #ifdef FSSYNC_BUILD_CLIENT
4173 if (salvinfo->useFSYNC) {
4174 afs_int32 fsync_code;
4176 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4178 Log("Error trying to tell the fileserver to break callbacks for "
4179 "changed volume %lu; error code %ld\n",
4180 afs_printable_uint32_lu(vid),
4181 afs_printable_int32_ld(fsync_code));
4183 salvinfo->VolumeChanged = 0;
4186 #endif /* FSSYNC_BUILD_CLIENT */
4188 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4189 if (!salvinfo->useFSYNC) {
4190 /* A volume's contents have changed, but the fileserver will not
4191 * break callbacks on the volume until it tries to load the vol
4192 * header. So, to reduce the amount of time a client could have
4193 * stale data, remove fsstate.dat, so the fileserver will init
4194 * callback state with all clients. This is a very coarse hammer,
4195 * and in the future we should just record which volumes have
4197 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4198 if (code && errno != ENOENT) {
4199 Log("Error %d when trying to unlink FS state file %s\n", errno,
4200 AFSDIR_SERVER_FSSTATE_FILEPATH);
4206 /* Turn off the inUse bit; the volume's been salvaged! */
4207 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4208 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4209 volHeader.inService = 1; /* allow service again */
4210 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4211 volHeader.dontSalvage = DONT_SALVAGE;
4212 salvinfo->VolumeChanged = 0;
4214 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4215 osi_Assert(nBytes == sizeof(volHeader));
4218 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4219 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4220 FilesInVolume, BlocksInVolume);
4223 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4224 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4230 ClearROInUseBit(struct VolumeSummary *summary)
4232 IHandle_t *h = summary->volumeInfoHandle;
4233 afs_sfsize_t nBytes;
4235 VolumeDiskData volHeader;
4237 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4238 osi_Assert(nBytes == sizeof(volHeader));
4239 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4240 volHeader.inUse = 0;
4241 volHeader.needsSalvaged = 0;
4242 volHeader.inService = 1;
4243 volHeader.dontSalvage = DONT_SALVAGE;
4245 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4246 osi_Assert(nBytes == sizeof(volHeader));
4251 * Possible delete the volume.
4253 * deleteMe - Always do so, only a partial volume.
4256 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4257 char *message, int deleteMe, int check)
4259 if (readOnly(isp) || deleteMe) {
4260 if (isp->volSummary && !isp->volSummary->deleted) {
4263 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4265 Log("It will be deleted on this server (you may find it elsewhere)\n");
4268 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4270 Log("it will be deleted instead. It should be recloned.\n");
4275 char filename[VMAXPATHLEN];
4276 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4277 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4279 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4281 Log("Error %ld destroying volume disk header for volume %lu\n",
4282 afs_printable_int32_ld(code),
4283 afs_printable_uint32_lu(isp->volumeId));
4286 /* make sure we actually delete the header file; ENOENT
4287 * is fine, since VDestroyVolumeDiskHeader probably already
4289 if (unlink(path) && errno != ENOENT) {
4290 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4292 if (salvinfo->useFSYNC) {
4293 AskDelete(salvinfo, isp->volumeId);
4295 isp->volSummary->deleted = 1;
4298 } else if (!check) {
4299 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4301 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4305 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4307 * Locks a volume on disk for salvaging.
4309 * @param[in] volumeId volume ID to lock
4311 * @return operation status
4313 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4314 * checked out and locked again
4319 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4324 /* should always be WRITE_LOCK, but keep the lock-type logic all
4325 * in one place, in VVolLockType. Params will be ignored, but
4326 * try to provide what we're logically doing. */
4327 locktype = VVolLockType(V_VOLUPD, 1);
4329 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4331 if (code == EBUSY) {
4332 Abort("Someone else appears to be using volume %lu; Aborted\n",
4333 afs_printable_uint32_lu(volumeId));
4335 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4336 afs_printable_int32_ld(code),
4337 afs_printable_uint32_lu(volumeId));
4340 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4341 if (code == SYNC_DENIED) {
4342 /* need to retry checking out volumes */
4345 if (code != SYNC_OK) {
4346 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4347 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4350 /* set inUse = programType in the volume header to ensure that nobody
4351 * tries to use this volume again without salvaging, if we somehow crash
4352 * or otherwise exit before finishing the salvage.
4356 struct VolumeHeader header;
4357 struct VolumeDiskHeader diskHeader;
4358 struct VolumeDiskData volHeader;
4360 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4365 DiskToVolumeHeader(&header, &diskHeader);
4367 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4368 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4369 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4375 volHeader.inUse = programType;
4377 /* If we can't re-write the header, bail out and error. We don't
4378 * assert when reading the header, since it's possible the
4379 * header isn't really there (when there's no data associated
4380 * with the volume; we just delete the vol header file in that
4381 * case). But if it's there enough that we can read it, but
4382 * somehow we cannot write to it to signify we're salvaging it,
4383 * we've got a big problem and we cannot continue. */
4384 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4391 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4394 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4396 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4398 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4399 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4400 if (code != SYNC_OK) {
4401 Log("AskError: failed to force volume %lu into error state; "
4402 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4403 (long)code, SYNC_res2string(code));
4405 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4409 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4414 memset(&res, 0, sizeof(res));
4416 for (i = 0; i < 3; i++) {
4417 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4418 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4420 if (code == SYNC_OK) {
4422 } else if (code == SYNC_DENIED) {
4424 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4426 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4427 Abort("Salvage aborted\n");
4428 } else if (code == SYNC_BAD_COMMAND) {
4429 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4432 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4433 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4435 Log("AskOffline: fileserver is DAFS but we are not.\n");
4438 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4439 Log("AskOffline: fileserver is not DAFS but we are.\n");
4441 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4444 Abort("Salvage aborted\n");
4447 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4448 FSYNC_clientFinis();
4452 if (code != SYNC_OK) {
4453 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4454 Abort("Salvage aborted\n");
4458 /* don't want to pass around state; remember it here */
4459 static int isDAFS = -1;
4464 afs_int32 code = 1, i;
4466 /* we don't care if we race. the answer shouldn't change */
4470 memset(&res, 0, sizeof(res));
4472 for (i = 0; code && i < 3; i++) {
4473 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4475 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4476 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4477 FSYNC_reason2string(res.hdr.reason));
4478 FSYNC_clientFinis();
4484 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4488 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4498 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4500 struct VolumeDiskHeader diskHdr;
4502 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4504 /* volume probably does not exist; no need to bring back online */
4507 AskOnline(salvinfo, volumeId);
4511 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4515 for (i = 0; i < 3; i++) {
4516 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4517 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4519 if (code == SYNC_OK) {
4521 } else if (code == SYNC_DENIED) {
4522 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4523 } else if (code == SYNC_BAD_COMMAND) {
4524 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4526 Log("AskOnline: please make sure file server binaries are same version.\n");
4530 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4531 FSYNC_clientFinis();
4538 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4543 for (i = 0; i < 3; i++) {
4544 memset(&res, 0, sizeof(res));
4545 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4546 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4548 if (code == SYNC_OK) {
4550 } else if (code == SYNC_DENIED) {
4551 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4552 } else if (code == SYNC_BAD_COMMAND) {
4553 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4556 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4557 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4559 Log("AskOnline: fileserver is DAFS but we are not.\n");
4562 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4563 Log("AskOnline: fileserver is not DAFS but we are.\n");
4565 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4569 } else if (code == SYNC_FAILED &&
4570 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4571 res.hdr.reason == FSYNC_WRONG_PART)) {
4572 /* volume is already effectively 'deleted' */
4576 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4577 FSYNC_clientFinis();
4584 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4586 /* Volume parameter is passed in case iopen is upgraded in future to
4587 * require a volume Id to be passed
4590 IHandle_t *srcH, *destH;
4591 FdHandle_t *srcFdP, *destFdP;
4593 afs_foff_t size = 0;
4595 IH_INIT(srcH, device, rwvolume, inode1);
4596 srcFdP = IH_OPEN(srcH);
4597 osi_Assert(srcFdP != NULL);
4598 IH_INIT(destH, device, rwvolume, inode2);
4599 destFdP = IH_OPEN(destH);
4600 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4601 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4604 osi_Assert(nBytes == 0);
4605 FDH_REALLYCLOSE(srcFdP);
4606 FDH_REALLYCLOSE(destFdP);
4613 PrintInodeList(struct SalvInfo *salvinfo)
4615 struct ViceInodeInfo *ip;
4616 struct ViceInodeInfo *buf;
4619 afs_sfsize_t st_size;
4621 st_size = OS_SIZE(salvinfo->inodeFd);
4622 osi_Assert(st_size >= 0);
4623 buf = (struct ViceInodeInfo *)malloc(st_size);
4624 osi_Assert(buf != NULL);
4625 nInodes = st_size / sizeof(struct ViceInodeInfo);
4626 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4627 for (ip = buf; nInodes--; ip++) {
4628 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4629 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4630 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4631 ip->u.param[2], ip->u.param[3]);
4637 PrintInodeSummary(struct SalvInfo *salvinfo)
4640 struct InodeSummary *isp;
4642 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4643 isp = &salvinfo->inodeSummary[i];
4644 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4654 osi_Assert(0); /* Fork is never executed in the NT code path */
4658 #ifdef AFS_DEMAND_ATTACH_FS
4659 if ((f == 0) && (programType == salvageServer)) {
4660 /* we are a salvageserver child */
4661 #ifdef FSSYNC_BUILD_CLIENT
4662 VChildProcReconnectFS_r();
4664 #ifdef SALVSYNC_BUILD_CLIENT
4668 #endif /* AFS_DEMAND_ATTACH_FS */
4669 #endif /* !AFS_NT40_ENV */
4679 #ifdef AFS_DEMAND_ATTACH_FS
4680 if (programType == salvageServer) {
4681 #ifdef SALVSYNC_BUILD_CLIENT
4684 #ifdef FSSYNC_BUILD_CLIENT
4688 #endif /* AFS_DEMAND_ATTACH_FS */
4691 if (main_thread != pthread_self())
4692 pthread_exit((void *)code);
4705 pid = wait(&status);
4706 osi_Assert(pid != -1);
4707 if (WCOREDUMP(status))
4708 Log("\"%s\" core dumped!\n", prog);
4709 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4715 TimeStamp(time_t clock, int precision)
4718 static char timestamp[20];
4719 lt = localtime(&clock);
4721 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4723 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4728 CheckLogFile(char * log_path)
4730 char oldSlvgLog[AFSDIR_PATH_MAX];
4732 #ifndef AFS_NT40_ENV
4739 strcpy(oldSlvgLog, log_path);
4740 strcat(oldSlvgLog, ".old");
4742 renamefile(log_path, oldSlvgLog);
4743 logFile = afs_fopen(log_path, "a");
4745 if (!logFile) { /* still nothing, use stdout */
4749 #ifndef AFS_NAMEI_ENV
4750 AFS_DEBUG_IOPS_LOG(logFile);
4755 #ifndef AFS_NT40_ENV
4757 TimeStampLogFile(char * log_path)
4759 char stampSlvgLog[AFSDIR_PATH_MAX];
4764 lt = localtime(&now);
4765 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4766 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4767 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4768 lt->tm_min, lt->tm_sec);
4770 /* try to link the logfile to a timestamped filename */
4771 /* if it fails, oh well, nothing we can do */
4772 link(log_path, stampSlvgLog);
4781 #ifndef AFS_NT40_ENV
4783 printf("Can't show log since using syslog.\n");
4794 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4797 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4800 while (fgets(line, sizeof(line), logFile))
4807 Log(const char *format, ...)
4813 va_start(args, format);
4814 vsnprintf(tmp, sizeof tmp, format, args);
4816 #ifndef AFS_NT40_ENV
4818 syslog(LOG_INFO, "%s", tmp);
4822 gettimeofday(&now, NULL);
4823 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4829 Abort(const char *format, ...)
4834 va_start(args, format);
4835 vsnprintf(tmp, sizeof tmp, format, args);
4837 #ifndef AFS_NT40_ENV
4839 syslog(LOG_INFO, "%s", tmp);
4843 fprintf(logFile, "%s", tmp);
4855 ToString(const char *s)
4858 p = (char *)malloc(strlen(s) + 1);
4859 osi_Assert(p != NULL);
4864 /* Remove the FORCESALVAGE file */
4866 RemoveTheForce(char *path)
4869 struct afs_stat_st force; /* so we can use afs_stat to find it */
4870 strcpy(target,path);
4871 strcat(target,"/FORCESALVAGE");
4872 if (!Testing && ForceSalvage) {
4873 if (afs_stat(target,&force) == 0) unlink(target);
4877 #ifndef AFS_AIX32_ENV
4879 * UseTheForceLuke - see if we can use the force
4882 UseTheForceLuke(char *path)
4884 struct afs_stat_st force;
4886 strcpy(target,path);
4887 strcat(target,"/FORCESALVAGE");
4889 return (afs_stat(target, &force) == 0);
4893 * UseTheForceLuke - see if we can use the force
4896 * The VRMIX fsck will not muck with the filesystem it is supposedly
4897 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4898 * muck directly with the root inode, which is within the normal
4900 * ListViceInodes() has a side effect of setting ForceSalvage if
4901 * it detects a need, based on root inode examination.
4904 UseTheForceLuke(char *path)
4907 return 0; /* sorry OB1 */
4912 /* NT support routines */
4914 static char execpathname[MAX_PATH];
4916 nt_SalvagePartition(char *partName, int jobn)
4921 if (!*execpathname) {
4922 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4923 if (!n || n == 1023)
4926 job.cj_magic = SALVAGER_MAGIC;
4927 job.cj_number = jobn;
4928 (void)strcpy(job.cj_part, partName);
4929 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4934 nt_SetupPartitionSalvage(void *datap, int len)
4936 childJob_t *jobp = (childJob_t *) datap;
4937 char logname[AFSDIR_PATH_MAX];
4939 if (len != sizeof(childJob_t))
4941 if (jobp->cj_magic != SALVAGER_MAGIC)
4946 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4948 logFile = afs_fopen(logname, "w");
4956 #endif /* AFS_NT40_ENV */