2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #ifdef AFS_PTHREAD_ENV
104 # include <opr/lock.h>
107 #include <afs/afsint.h>
108 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
109 #if defined(AFS_VFSINCL_ENV)
110 #include <sys/vnode.h>
112 #include <sys/fs/ufs_inode.h>
114 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
115 #include <ufs/ufs/dinode.h>
116 #include <ufs/ffs/fs.h>
118 #include <ufs/inode.h>
121 #else /* AFS_VFSINCL_ENV */
122 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
123 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
156 #include <rx/rx_queue.h>
161 #include <afs/afssyscalls.h>
165 #include "partition.h"
166 #include "daemon_com.h"
167 #include "daemon_com_inline.h"
169 #include "fssync_inline.h"
170 #include "volume_inline.h"
171 #include "salvsync.h"
172 #include "viceinode.h"
174 #include "volinodes.h" /* header magic number, etc. stuff */
175 #include "vol-salvage.h"
177 #include "vol_internal.h"
179 #include <afs/prs_fs.h>
181 #ifdef FSSYNC_BUILD_CLIENT
182 #include "vg_cache.h"
189 #define SALV_BUFFER_SIZE 1024
191 static char *TimeStamp(char *buffer, size_t size, time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 char *ShowLogFilename = NULL; /* log file name for -showlog */
204 int ShowSuid = 0; /* -showsuid flag */
205 int ShowMounts = 0; /* -showmounts flag */
206 int orphans = ORPH_IGNORE; /* -orphans option */
208 int ClientMode = 0; /* running as salvager server client */
216 #define MAXPARALLEL 32
218 int OKToZap; /* -o flag */
219 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
220 * in the volume header */
222 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
225 * information that is 'global' to a particular salvage job.
228 Device fileSysDevice; /**< The device number of the current partition
230 char fileSysPath[9]; /**< The path of the mounted partition currently
231 * being salvaged, i.e. the directory containing
232 * the volume headers */
233 char *fileSysPathName; /**< NT needs this to make name pretty log. */
234 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
235 int VGLinkH_cnt; /**< # of references to lnk handle. */
236 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
239 char *fileSysDeviceName; /**< The block device where the file system being
240 * salvaged was mounted */
241 char *filesysfulldev;
243 int VolumeChanged; /**< Set by any routine which would change the
244 * volume in a way which would require callbacks
245 * to be broken if the volume was put back on
246 * on line by an active file server */
248 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
249 * header dealt with */
251 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
252 FD_t inodeFd; /**< File descriptor for inode file */
254 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
255 int nVolumes; /**< Number of volumes (read-write and read-only)
256 * in volume summary */
257 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
260 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
261 * vnodes in the volume that
262 * we are currently looking
264 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
265 * to contact the fileserver over FSYNC */
272 /* Forward declarations */
273 static void QuietExit(int) AFS_NORETURN;
274 static void SalvageShowLog(void);
275 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
276 static int AskVolumeSummary(struct SalvInfo *salvinfo,
277 VolumeId singleVolumeNumber);
278 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
279 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
281 #ifdef AFS_DEMAND_ATTACH_FS
282 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
283 #endif /* AFS_DEMAND_ATTACH_FS */
285 /* Uniquifier stored in the Inode */
290 return (u & 0x3fffff);
292 #if defined(AFS_SGI_EXMAG)
293 return (u & SGI_UNIQMASK);
296 #endif /* AFS_SGI_EXMAG */
303 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
305 return 0; /* otherwise may be transient, e.g. EMFILE */
310 char *save_args[MAX_ARGS];
312 extern pthread_t main_thread;
313 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
317 * Get the salvage lock if not already held. Hold until process exits.
319 * @param[in] locktype READ_LOCK or WRITE_LOCK
322 _ObtainSalvageLock(int locktype)
324 struct VLockFile salvageLock;
329 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
331 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
334 "salvager: There appears to be another salvager running! "
339 "salvager: Error %d trying to acquire salvage lock! "
345 ObtainSalvageLock(void)
347 _ObtainSalvageLock(WRITE_LOCK);
350 ObtainSharedSalvageLock(void)
352 _ObtainSalvageLock(READ_LOCK);
356 #ifdef AFS_SGI_XFS_IOPS_ENV
357 /* Check if the given partition is mounted. For XFS, the root inode is not a
358 * constant. So we check the hard way.
361 IsPartitionMounted(char *part)
364 struct mntent *mntent;
366 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
367 while (mntent = getmntent(mntfp)) {
368 if (!strcmp(part, mntent->mnt_dir))
373 return mntent ? 1 : 1;
376 /* Check if the given inode is the root of the filesystem. */
377 #ifndef AFS_SGI_XFS_IOPS_ENV
379 IsRootInode(struct afs_stat_st *status)
382 * The root inode is not a fixed value in XFS partitions. So we need to
383 * see if the partition is in the list of mounted partitions. This only
384 * affects the SalvageFileSys path, so we check there.
386 return (status->st_ino == ROOTINODE);
391 #ifndef AFS_NAMEI_ENV
392 /* We don't want to salvage big files filesystems, since we can't put volumes on
396 CheckIfBigFilesFS(char *mountPoint, char *devName)
398 struct superblock fs;
401 if (strncmp(devName, "/dev/", 5)) {
402 (void)sprintf(name, "/dev/%s", devName);
404 (void)strcpy(name, devName);
407 if (ReadSuper(&fs, name) < 0) {
408 Log("Unable to read superblock. Not salvaging partition %s.\n",
412 if (IsBigFilesFileSystem(&fs)) {
413 Log("Partition %s is a big files filesystem, not salvaging.\n",
423 #define HDSTR "\\Device\\Harddisk"
424 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
426 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
432 static int dowarn = 1;
434 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
436 if (strncmp(res1, HDSTR, HDLEN)) {
439 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
440 res1, HDSTR, p1->devName);
443 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
445 if (strncmp(res2, HDSTR, HDLEN)) {
448 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
449 res2, HDSTR, p2->devName);
453 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
456 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
459 /* This assumes that two partitions with the same device number divided by
460 * PartsPerDisk are on the same disk.
463 SalvageFileSysParallel(struct DiskPartition64 *partP)
466 struct DiskPartition64 *partP;
467 int pid; /* Pid for this job */
468 int jobnumb; /* Log file job number */
469 struct job *nextjob; /* Next partition on disk to salvage */
471 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
472 struct job *thisjob = 0;
473 static int numjobs = 0;
474 static int jobcount = 0;
482 /* We have a partition to salvage. Copy it into thisjob */
483 thisjob = calloc(1, sizeof(struct job));
485 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
488 thisjob->partP = partP;
489 thisjob->jobnumb = jobcount;
491 } else if (jobcount == 0) {
492 /* We are asking to wait for all jobs (partp == 0), yet we never
495 Log("No file system partitions named %s* found; not salvaged\n",
496 VICE_PARTITION_PREFIX);
500 if (debug || Parallel == 1) {
502 SalvageFileSys(thisjob->partP, 0);
509 /* Check to see if thisjob is for a disk that we are already
510 * salvaging. If it is, link it in as the next job to do. The
511 * jobs array has 1 entry per disk being salvages. numjobs is
512 * the total number of disks currently being salvaged. In
513 * order to keep thejobs array compact, when a disk is
514 * completed, the hightest element in the jobs array is moved
515 * down to now open slot.
517 for (j = 0; j < numjobs; j++) {
518 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
519 /* On same disk, add it to this list and return */
520 thisjob->nextjob = jobs[j]->nextjob;
521 jobs[j]->nextjob = thisjob;
528 /* Loop until we start thisjob or until all existing jobs are finished */
529 while (thisjob || (!partP && (numjobs > 0))) {
530 startjob = -1; /* No new job to start */
532 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
533 /* Either the max jobs are running or we have to wait for all
534 * the jobs to finish. In either case, we wait for at least one
535 * job to finish. When it's done, clean up after it.
537 pid = wait(&wstatus);
538 opr_Assert(pid != -1);
539 for (j = 0; j < numjobs; j++) { /* Find which job it is */
540 if (pid == jobs[j]->pid)
543 opr_Assert(j < numjobs);
544 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
545 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
548 numjobs--; /* job no longer running */
549 oldjob = jobs[j]; /* remember */
550 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
551 free(oldjob); /* free the old job */
553 /* If there is another partition on the disk to salvage, then
554 * say we will start it (startjob). If not, then put thisjob there
555 * and say we will start it.
557 if (jobs[j]) { /* Another partitions to salvage */
558 startjob = j; /* Will start it */
559 } else { /* There is not another partition to salvage */
561 jobs[j] = thisjob; /* Add thisjob */
563 startjob = j; /* Will start it */
565 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
566 startjob = -1; /* Don't start it - already running */
570 /* We don't have to wait for a job to complete */
572 jobs[numjobs] = thisjob; /* Add this job */
574 startjob = numjobs; /* Will start it */
578 /* Start up a new salvage job on a partition in job slot "startjob" */
579 if (startjob != -1) {
581 Log("Starting salvage of file system partition %s\n",
582 jobs[startjob]->partP->name);
584 /* For NT, we not only fork, but re-exec the salvager. Pass in the
585 * commands and pass the child job number via the data path.
588 nt_SalvagePartition(jobs[startjob]->partP->name,
589 jobs[startjob]->jobnumb);
590 jobs[startjob]->pid = pid;
595 jobs[startjob]->pid = pid;
600 struct logOptions logopts;
602 memset(&logopts, 0, sizeof(logopts));
603 logopts.lopt_dest = logDest_file;
605 for (fd = 0; fd < 16; fd++)
611 ShowLog = 0; /* Child processes do not display. */
612 if (asprintf(&filename, "%s.%d",
613 AFSDIR_SERVER_SLVGLOG_FILEPATH,
614 jobs[startjob]->jobnumb) >= 0) {
615 logopts.lopt_filename = filename;
620 SalvageFileSys1(jobs[startjob]->partP, 0);
625 } /* while ( thisjob || (!partP && numjobs > 0) ) */
628 * If waited for all jobs to complete, now collect log files and return.
629 * No files can be collected when logging to the system log (syslog).
631 if (GetLogDest() == logDest_file) {
633 char *buf = calloc(1, SALV_BUFFER_SIZE);
637 Log("out of memory");
639 for (i = 0; i < jobcount; i++) {
640 if (asprintf(&logFileName, "%s.%d",
641 AFSDIR_SERVER_SLVGLOG_FILEPATH, i) < 0) {
642 Log("out of memory");
645 if ((passLog = afs_fopen(logFileName, "r"))) {
646 while (fgets(buf, SALV_BUFFER_SIZE, passLog)) {
647 WriteLogBuffer(buf, strlen(buf));
651 (void)unlink(logFileName);
663 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
665 if (!canfork || debug || Fork() == 0) {
666 SalvageFileSys1(partP, singleVolumeNumber);
667 if (canfork && !debug) {
671 Wait("SalvageFileSys");
675 get_DevName(char *pbuffer, char *wpath)
677 char pbuf[128], *ptr;
678 strcpy(pbuf, pbuffer);
679 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
685 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
687 strcpy(pbuffer, ptr + 1);
694 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
697 char *inodeListPath = NULL;
698 FD_t inodeFile = INVALID_FD;
699 static char tmpDevName[100];
700 static char wpath[100];
701 struct VolumeSummary *vsp, *esp;
705 struct SalvInfo l_salvinfo;
706 struct SalvInfo *salvinfo = &l_salvinfo;
709 memset(salvinfo, 0, sizeof(*salvinfo));
712 if (inodeFile != INVALID_FD) {
714 inodeFile = INVALID_FD;
716 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
717 Abort("Raced too many times with fileserver restarts while trying to "
718 "checkout/lock volumes; Aborted\n");
720 #ifdef AFS_DEMAND_ATTACH_FS
722 /* unlock all previous volume locks, since we're about to lock them
724 VLockFileReinit(&partP->volLockFile);
726 #endif /* AFS_DEMAND_ATTACH_FS */
728 salvinfo->fileSysPartition = partP;
729 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
730 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
733 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
734 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
735 name = partP->devName;
737 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
738 strcpy(tmpDevName, partP->devName);
739 name = get_DevName(tmpDevName, wpath);
740 salvinfo->fileSysDeviceName = name;
741 salvinfo->filesysfulldev = wpath;
744 if (singleVolumeNumber) {
745 #ifndef AFS_DEMAND_ATTACH_FS
746 /* only non-DAFS locks the partition when salvaging a single volume;
747 * DAFS will lock the individual volumes in the VG */
748 VLockPartition(partP->name);
749 #endif /* !AFS_DEMAND_ATTACH_FS */
753 /* salvageserver already setup fssync conn for us */
754 if ((programType != salvageServer) && !VConnectFS()) {
755 Abort("Couldn't connect to file server\n");
758 salvinfo->useFSYNC = 1;
759 AskOffline(salvinfo, singleVolumeNumber);
760 #ifdef AFS_DEMAND_ATTACH_FS
761 if (LockVolume(salvinfo, singleVolumeNumber)) {
764 #endif /* AFS_DEMAND_ATTACH_FS */
767 salvinfo->useFSYNC = 0;
768 VLockPartition(partP->name);
772 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
775 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
776 partP->name, name, (Testing ? "(READONLY mode)" : ""));
778 Log("***Forced salvage of all volumes on this partition***\n");
783 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
790 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
791 while ((dp = readdir(dirp))) {
792 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
793 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
795 Log("Removing old salvager temp files %s\n", dp->d_name);
796 strcpy(npath, salvinfo->fileSysPath);
797 strcat(npath, OS_DIRSEP);
798 strcat(npath, dp->d_name);
804 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
806 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
807 inodeListPath = strdup(_tempnam(tdir, "salvage.inodes."));
808 if (inodeListPath == NULL) {
809 Abort("Error allocating memory for inodeListPath\n");
812 code = asprintf(&inodeListPath, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
815 Abort("Error allocating memory for inodeListPath\n");
819 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
820 if (inodeFile == INVALID_FD) {
821 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
824 /* Using nt_unlink here since we're really using the delete on close
825 * semantics of unlink. In most places in the salvager, we really do
826 * mean to unlink the file at that point. Those places have been
827 * modified to actually do that so that the NT crt can be used there.
829 * jaltman - On NT delete on close cannot be applied to a file while the
830 * process has an open file handle that does not have DELETE file
831 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
832 * delete privileges. As a result the nt_unlink() call will always
835 code = nt_unlink(inodeListPath);
837 code = unlink(inodeListPath);
840 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
843 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
848 salvinfo->inodeFd = inodeFile;
849 if (salvinfo->inodeFd == INVALID_FD)
850 Abort("Temporary file %s is missing...\n", inodeListPath);
853 inodeListPath = NULL;
855 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
856 if (ListInodeOption) {
857 PrintInodeList(salvinfo);
858 if (singleVolumeNumber) {
859 /* We've checked out the volume from the fileserver, and we need
860 * to give it back. We don't know if the volume exists or not,
861 * so we don't know whether to AskOnline or not. Try to determine
862 * if the volume exists by trying to read the volume header, and
863 * AskOnline if it is readable. */
864 MaybeAskOnline(salvinfo, singleVolumeNumber);
868 /* enumerate volumes in the partition.
869 * figure out sets of read-only + rw volumes.
870 * salvage each set, read-only volumes first, then read-write.
871 * Fix up inodes on last volume in set (whether it is read-write
874 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
878 if (singleVolumeNumber) {
879 /* If we delete a volume during the salvage, we indicate as such by
880 * setting the volsummary->deleted field. We need to know if we
881 * deleted a volume or not in order to know which volumes to bring
882 * back online after the salvage. If we fork, we will lose this
883 * information, since volsummary->deleted will not get set in the
884 * parent. So, don't fork. */
888 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
889 i < salvinfo->nVolumesInInodeFile; i = j) {
890 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
892 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
894 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
895 struct VolumeSummary *tsp;
896 /* Scan volume list (from partition root directory) looking for the
897 * current rw volume number in the volume list from the inode scan.
898 * If there is one here that is not in the inode volume list,
900 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
902 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
904 /* Now match up the volume summary info from the root directory with the
905 * entry in the volume list obtained from scanning inodes */
906 salvinfo->inodeSummary[j].volSummary = NULL;
907 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
908 if (tsp->header.id == vid) {
909 salvinfo->inodeSummary[j].volSummary = tsp;
915 /* Salvage the group of volumes (several read-only + 1 read/write)
916 * starting with the current read-only volume we're looking at.
919 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
921 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
922 #endif /* AFS_NT40_ENV */
926 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
927 for (; vsp < esp; vsp++) {
929 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
932 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
933 RemoveTheForce(salvinfo->fileSysPath);
935 if (!Testing && singleVolumeNumber) {
937 #ifdef AFS_DEMAND_ATTACH_FS
938 /* unlock vol headers so the fs can attach them when we AskOnline */
939 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
940 #endif /* AFS_DEMAND_ATTACH_FS */
942 /* Step through the volumeSummary list and set all volumes on-line.
943 * Most volumes were taken off-line in GetVolumeSummary.
944 * If a volume was deleted, don't tell the fileserver anything, since
945 * we already told the fileserver the volume was deleted back when we
946 * we destroyed the volume header.
947 * Also, make sure we bring the singleVolumeNumber back online first.
950 for (j = 0; j < salvinfo->nVolumes; j++) {
951 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
953 if (!salvinfo->volumeSummaryp[j].deleted) {
954 AskOnline(salvinfo, singleVolumeNumber);
960 /* If singleVolumeNumber is not in our volumeSummary, it means that
961 * at least one other volume in the VG is on the partition, but the
962 * RW volume is not. We've already AskOffline'd it by now, though,
963 * so make sure we don't still have the volume checked out. */
964 AskDelete(salvinfo, singleVolumeNumber);
967 for (j = 0; j < salvinfo->nVolumes; j++) {
968 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
969 if (!salvinfo->volumeSummaryp[j].deleted) {
970 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
976 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
977 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
980 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
984 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
986 char path[VMAXPATHLEN + 10];
987 char filename[VMAXPATHLEN];
993 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
994 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
997 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
1000 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
1002 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
1003 afs_printable_int32_ld(code),
1004 afs_printable_VolumeId_lu(vsp->header.id));
1007 /* make sure we actually delete the header file; ENOENT
1008 * is fine, since VDestroyVolumeDiskHeader probably already
1010 if (unlink(path) && errno != ENOENT) {
1011 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1013 if (salvinfo->useFSYNC) {
1014 AskDelete(salvinfo, vsp->header.id);
1021 CompareInodes(const void *_p1, const void *_p2)
1023 const struct ViceInodeInfo *p1 = _p1;
1024 const struct ViceInodeInfo *p2 = _p2;
1025 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1026 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1027 VolumeId p1rwid, p2rwid;
1029 (p1->u.vnode.vnodeNumber ==
1030 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1032 (p2->u.vnode.vnodeNumber ==
1033 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1034 if (p1rwid < p2rwid)
1036 if (p1rwid > p2rwid)
1038 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1039 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1040 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1041 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1042 if (p1->u.vnode.volumeId == p1rwid)
1044 if (p2->u.vnode.volumeId == p2rwid)
1046 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1048 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1049 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1050 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1052 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1054 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1056 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1058 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1060 /* The following tests are reversed, so that the most desirable
1061 * of several similar inodes comes first */
1062 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1063 #ifdef AFS_3DISPARES
1064 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1065 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1068 #ifdef AFS_SGI_EXMAG
1069 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1070 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1075 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1076 #ifdef AFS_3DISPARES
1077 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1078 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1081 #ifdef AFS_SGI_EXMAG
1082 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1083 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1088 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1089 #ifdef AFS_3DISPARES
1090 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1091 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1094 #ifdef AFS_SGI_EXMAG
1095 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1096 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1101 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1102 #ifdef AFS_3DISPARES
1103 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1104 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1107 #ifdef AFS_SGI_EXMAG
1108 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1109 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1118 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1119 struct InodeSummary *summary)
1121 VolumeId volume = ip->u.vnode.volumeId;
1122 VolumeId rwvolume = volume;
1127 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1129 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1131 rwvolume = ip->u.special.parentId;
1132 /* This isn't quite right, as there could (in error) be different
1133 * parent inodes in different special vnodes */
1135 if (maxunique < ip->u.vnode.vnodeUniquifier)
1136 maxunique = ip->u.vnode.vnodeUniquifier;
1140 summary->volumeId = volume;
1141 summary->RWvolumeId = rwvolume;
1142 summary->nInodes = n;
1143 summary->nSpecialInodes = nSpecial;
1144 summary->maxUniquifier = maxunique;
1148 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, VolumeId singleVolumeNumber, void *rock)
1150 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1151 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1152 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1157 * Collect list of inodes in file named by path. If a truly fatal error,
1158 * unlink the file and abort. For lessor errors, return -1. The file will
1159 * be unlinked by the caller.
1162 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1166 struct ViceInodeInfo *ip, *ip_save;
1167 struct InodeSummary summary;
1168 char summaryFileName[50];
1169 FD_t summaryFile = INVALID_FD;
1171 char *dev = salvinfo->fileSysPath;
1172 char *wpath = salvinfo->fileSysPath;
1174 char *dev = salvinfo->fileSysDeviceName;
1175 char *wpath = salvinfo->filesysfulldev;
1177 char *part = salvinfo->fileSysPath;
1182 afs_sfsize_t st_size;
1184 /* This file used to come from vfsck; cobble it up ourselves now... */
1186 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1187 singleVolumeNumber ? OnlyOneVolume : 0,
1188 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1190 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1194 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1196 if (forceSal && !ForceSalvage) {
1197 Log("***Forced salvage of all volumes on this partition***\n");
1200 OS_SEEK(inodeFile, 0L, SEEK_SET);
1201 salvinfo->inodeFd = inodeFile;
1202 if (salvinfo->inodeFd == INVALID_FD ||
1203 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1204 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1206 tdir = (tmpdir ? tmpdir : part);
1208 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1209 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1211 snprintf(summaryFileName, sizeof summaryFileName,
1212 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1214 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1215 if (summaryFile == INVALID_FD) {
1216 Abort("Unable to create inode summary file\n");
1220 /* Using nt_unlink here since we're really using the delete on close
1221 * semantics of unlink. In most places in the salvager, we really do
1222 * mean to unlink the file at that point. Those places have been
1223 * modified to actually do that so that the NT crt can be used there.
1225 * jaltman - As commented elsewhere, this cannot work because fopen()
1226 * does not open files with DELETE and FILE_SHARE_DELETE.
1228 code = nt_unlink(summaryFileName);
1230 code = unlink(summaryFileName);
1233 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1236 if (!canfork || debug || Fork() == 0) {
1237 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1239 OS_CLOSE(summaryFile);
1240 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1241 RemoveTheForce(salvinfo->fileSysPath);
1243 struct VolumeSummary *vsp;
1247 GetVolumeSummary(salvinfo, singleVolumeNumber);
1249 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1251 if (vsp->header.id == singleVolumeNumber) {
1254 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1260 MaybeAskOnline(salvinfo, singleVolumeNumber);
1262 /* make sure we get rid of stray .vol headers, even if
1263 * they're not in our volume summary (might happen if
1264 * e.g. something else created them and they're not in the
1265 * fileserver VGC) */
1266 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1267 singleVolumeNumber, 0 /*parent*/);
1268 AskDelete(salvinfo, singleVolumeNumber);
1272 Log("%s vice inodes on %s; not salvaged\n",
1273 singleVolumeNumber ? "No applicable" : "No", dev);
1278 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1280 OS_CLOSE(summaryFile);
1282 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1285 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1286 OS_CLOSE(summaryFile);
1287 Abort("Unable to read inode table; %s not salvaged\n", dev);
1289 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1290 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1291 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1292 OS_CLOSE(summaryFile);
1293 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1298 CountVolumeInodes(ip, nInodes, &summary);
1299 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1300 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1301 OS_CLOSE(summaryFile);
1305 summary.index += (summary.nInodes);
1306 nInodes -= summary.nInodes;
1307 ip += summary.nInodes;
1310 ip = ip_save = NULL;
1311 /* Following fflush is not fclose, because if it was debug mode would not work */
1312 if (OS_SYNC(summaryFile) == -1) {
1313 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1314 OS_CLOSE(summaryFile);
1318 if (canfork && !debug) {
1322 if (Wait("Inode summary") == -1) {
1323 OS_CLOSE(summaryFile);
1324 Exit(1); /* salvage of this partition aborted */
1328 st_size = OS_SIZE(summaryFile);
1329 opr_Assert(st_size >= 0);
1332 salvinfo->inodeSummary = malloc(st_size);
1333 opr_Assert(salvinfo->inodeSummary != NULL);
1334 /* For GNU we need to do lseek to get the file pointer moved. */
1335 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1336 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1337 opr_Assert(ret == st_size);
1339 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1340 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1341 salvinfo->inodeSummary[i].volSummary = NULL;
1343 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1344 OS_CLOSE(summaryFile);
1347 if (retcode && singleVolumeNumber && !deleted) {
1348 AskError(salvinfo, singleVolumeNumber);
1354 /* Comparison routine for volume sort.
1355 This is setup so that a read-write volume comes immediately before
1356 any read-only clones of that volume */
1358 CompareVolumes(const void *_p1, const void *_p2)
1360 const struct VolumeSummary *p1 = _p1;
1361 const struct VolumeSummary *p2 = _p2;
1362 if (p1->header.parent != p2->header.parent)
1363 return p1->header.parent < p2->header.parent ? -1 : 1;
1364 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1366 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1368 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1372 * Gleans volumeSummary information by asking the fileserver
1374 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1375 * salvaging a whole partition
1377 * @return whether we obtained the volume summary information or not
1378 * @retval 0 success; we obtained the volume summary information
1379 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1381 * @retval 1 we did not get the volume summary information; either the
1382 * fileserver responded with an error, or we are not supposed to
1383 * ask the fileserver for the information (e.g. we are salvaging
1384 * the entire partition or we are not the salvageserver)
1386 * @note for non-DAFS, always returns 1
1389 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1392 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1393 if (programType == salvageServer) {
1394 if (singleVolumeNumber) {
1395 FSSYNC_VGQry_response_t q_res;
1397 struct VolumeSummary *vsp;
1399 struct VolumeDiskHeader diskHdr;
1401 memset(&res, 0, sizeof(res));
1403 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1406 * We must wait for the partition to finish scanning before
1407 * can continue, since we will not know if we got the entire
1408 * VG membership unless the partition is fully scanned.
1409 * We could, in theory, just scan the partition ourselves if
1410 * the VG cache is not ready, but we would be doing the exact
1411 * same scan the fileserver is doing; it will almost always
1412 * be faster to wait for the fileserver. The only exceptions
1413 * are if the partition does not take very long to scan, and
1414 * in that case it's fast either way, so who cares?
1416 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1417 Log("waiting for fileserver to finish scanning partition %s...\n",
1418 salvinfo->fileSysPartition->name);
1420 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1421 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1422 * just so small partitions don't need to wait over 10
1423 * seconds every time, and large partitions are generally
1424 * polled only once every ten seconds. */
1425 sleep((i > 10) ? (i = 10) : i);
1427 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1431 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1432 /* This can happen if there's no header for the volume
1433 * we're salvaging, or no headers exist for the VG (if
1434 * we're salvaging an RW). Act as if we got a response
1435 * with no VG members. The headers may be created during
1436 * salvaging, if there are inodes in this VG. */
1438 memset(&q_res, 0, sizeof(q_res));
1439 q_res.rw = singleVolumeNumber;
1443 Log("fileserver refused VGCQuery request for volume %" AFS_VOLID_FMT " on "
1444 "partition %s, code %ld reason %ld\n",
1445 afs_printable_VolumeId_lu(singleVolumeNumber),
1446 salvinfo->fileSysPartition->name,
1447 afs_printable_int32_ld(code),
1448 afs_printable_int32_ld(res.hdr.reason));
1452 if (q_res.rw != singleVolumeNumber) {
1453 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1454 afs_printable_VolumeId_lu(singleVolumeNumber),
1455 afs_printable_VolumeId_lu(q_res.rw));
1456 #ifdef SALVSYNC_BUILD_CLIENT
1457 if (SALVSYNC_LinkVolume(q_res.rw,
1459 salvinfo->fileSysPartition->name,
1461 Log("schedule request failed\n");
1463 #endif /* SALVSYNC_BUILD_CLIENT */
1464 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1467 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1468 opr_Assert(salvinfo->volumeSummaryp != NULL);
1470 salvinfo->nVolumes = 0;
1471 vsp = salvinfo->volumeSummaryp;
1473 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1474 char name[VMAXPATHLEN];
1476 if (!q_res.children[i]) {
1480 /* AskOffline for singleVolumeNumber was called much earlier */
1481 if (q_res.children[i] != singleVolumeNumber) {
1482 AskOffline(salvinfo, q_res.children[i]);
1483 if (LockVolume(salvinfo, q_res.children[i])) {
1489 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1491 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1492 afs_printable_uint32_lu(q_res.children[i]));
1497 DiskToVolumeHeader(&vsp->header, &diskHdr);
1498 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1500 salvinfo->nVolumes++;
1504 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1509 Log("Cannot get volume summary from fileserver; falling back to scanning "
1510 "entire partition\n");
1513 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1518 * count how many volume headers are found by VWalkVolumeHeaders.
1520 * @param[in] dp the disk partition (unused)
1521 * @param[in] name full path to the .vol header (unused)
1522 * @param[in] hdr the header data (unused)
1523 * @param[in] last whether this is the last try or not (unused)
1524 * @param[in] rock actually an afs_int32*; the running count of how many
1525 * volumes we have found
1530 CountHeader(struct DiskPartition64 *dp, const char *name,
1531 struct VolumeDiskHeader *hdr, int last, void *rock)
1533 afs_int32 *nvols = (afs_int32 *)rock;
1539 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1542 struct SalvageScanParams {
1543 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1544 * vol id of the VG we're salvaging */
1545 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1546 * we're filling in */
1547 afs_int32 nVolumes; /**< # of vols we've encountered */
1548 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1549 * # of vols we've alloc'd memory for) */
1550 int retry; /**< do we need to retry vol lock/checkout? */
1551 struct SalvInfo *salvinfo; /**< salvage job info */
1555 * records volume summary info found from VWalkVolumeHeaders.
1557 * Found volumes are also taken offline if they are in the specific volume
1558 * group we are looking for.
1560 * @param[in] dp the disk partition
1561 * @param[in] name full path to the .vol header
1562 * @param[in] hdr the header data
1563 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1564 * @param[in] rock actually a struct SalvageScanParams*, containing the
1565 * information needed to record the volume summary data
1567 * @return operation status
1569 * @retval -1 volume locking raced with fileserver restart; checking out
1570 * and locking volumes needs to be retried
1571 * @retval 1 volume header is mis-named and should be deleted
1574 RecordHeader(struct DiskPartition64 *dp, const char *name,
1575 struct VolumeDiskHeader *hdr, int last, void *rock)
1577 char nameShouldBe[64];
1578 struct SalvageScanParams *params;
1579 struct VolumeSummary summary;
1580 VolumeId singleVolumeNumber;
1581 struct SalvInfo *salvinfo;
1583 params = (struct SalvageScanParams *)rock;
1585 memset(&summary, 0, sizeof(summary));
1587 singleVolumeNumber = params->singleVolumeNumber;
1588 salvinfo = params->salvinfo;
1590 DiskToVolumeHeader(&summary.header, hdr);
1592 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1593 && summary.header.parent != singleVolumeNumber) {
1595 if (programType == salvageServer) {
1596 #ifdef SALVSYNC_BUILD_CLIENT
1597 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1598 afs_printable_VolumeId_lu(summary.header.id),
1599 afs_printable_VolumeId_lu(summary.header.parent));
1600 if (SALVSYNC_LinkVolume(summary.header.parent,
1604 Log("schedule request failed\n");
1607 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1610 Log("%" AFS_VOLID_FMT " is a read-only volume; not salvaged\n",
1611 afs_printable_VolumeId_lu(singleVolumeNumber));
1616 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1617 || summary.header.parent == singleVolumeNumber) {
1619 /* check if the header file is incorrectly named */
1621 const char *base = strrchr(name, OS_DIRSEPC);
1628 snprintf(nameShouldBe, sizeof nameShouldBe,
1629 VFORMAT, afs_printable_VolumeId_lu(summary.header.id));
1632 if (strcmp(nameShouldBe, base)) {
1633 /* .vol file has wrong name; retry/delete */
1637 if (!badname || last) {
1638 /* only offline the volume if the header is good, or if this is
1639 * the last try looking at it; avoid AskOffline'ing the same vol
1642 if (singleVolumeNumber
1643 && summary.header.id != singleVolumeNumber) {
1644 /* don't offline singleVolumeNumber; we already did that
1647 AskOffline(salvinfo, summary.header.id);
1649 #ifdef AFS_DEMAND_ATTACH_FS
1651 /* don't lock the volume if the header is bad, since we're
1652 * about to delete it anyway. */
1653 if (LockVolume(salvinfo, summary.header.id)) {
1658 #endif /* AFS_DEMAND_ATTACH_FS */
1662 if (last && !Showmode) {
1663 Log("Volume header file %s is incorrectly named (should be %s "
1664 "not %s); %sdeleted (it will be recreated later, if "
1665 "necessary)\n", name, nameShouldBe, base,
1666 (Testing ? "it would have been " : ""));
1674 if (params->nVolumes > params->totalVolumes) {
1675 /* We found more volumes than we found on the first partition walk;
1676 * apparently something created a volume while we were
1677 * partition-salvaging, or we found more than 20 vols when salvaging a
1678 * particular volume. Abort if we detect this, since other programs
1679 * supposed to not touch the partition while it is partition-salvaging,
1680 * and we shouldn't find more than 20 vols in a VG.
1682 Abort("Found %ld vol headers, but should have found at most %ld! "
1683 "Make sure the volserver/fileserver are not running at the "
1684 "same time as a partition salvage\n",
1685 afs_printable_int32_ld(params->nVolumes),
1686 afs_printable_int32_ld(params->totalVolumes));
1689 memcpy(params->vsp, &summary, sizeof(summary));
1697 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1699 * If the header could not be read in at all, the header is always unlinked.
1700 * If instead RecordHeader said the header was bad (that is, the header file
1701 * is mis-named), we only unlink if we are doing a partition salvage, as
1702 * opposed to salvaging a specific volume group.
1704 * @param[in] dp the disk partition
1705 * @param[in] name full path to the .vol header
1706 * @param[in] hdr header data, or NULL if the header could not be read
1707 * @param[in] rock actually a struct SalvageScanParams*, with some information
1711 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1712 struct VolumeDiskHeader *hdr, void *rock)
1714 struct SalvageScanParams *params;
1717 params = (struct SalvageScanParams *)rock;
1720 /* no header; header is too bogus to read in at all */
1722 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1728 } else if (!params->singleVolumeNumber) {
1729 /* We were able to read in a header, but RecordHeader said something
1730 * was wrong with it. We only unlink those if we are doing a partition
1737 if (dounlink && unlink(name)) {
1738 Log("Error %d while trying to unlink %s\n", errno, name);
1743 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1744 * the fileserver for VG information, or by scanning the /vicepX partition.
1746 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1747 * are salvaging, or 0 if this is a partition
1750 * @return operation status
1752 * @retval -1 we raced with a fileserver restart; checking out and locking
1753 * volumes must be retried
1756 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1758 afs_int32 nvols = 0;
1759 struct SalvageScanParams params;
1762 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1764 /* we successfully got the vol information from the fileserver; no
1765 * need to scan the partition */
1769 /* we need to retry volume checkout */
1773 if (!singleVolumeNumber) {
1774 /* Count how many volumes we have in /vicepX */
1775 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1778 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1783 nvols = VOL_VG_MAX_VOLS;
1786 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1787 opr_Assert(salvinfo->volumeSummaryp != NULL);
1789 params.singleVolumeNumber = singleVolumeNumber;
1790 params.vsp = salvinfo->volumeSummaryp;
1791 params.nVolumes = 0;
1792 params.totalVolumes = nvols;
1794 params.salvinfo = salvinfo;
1796 /* walk the partition directory of volume headers and record the info
1797 * about them; unlinking invalid headers */
1798 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1799 UnlinkHeader, ¶ms);
1801 /* we apparently need to retry checking-out/locking volumes */
1805 Abort("Failed to get volume header summary\n");
1807 salvinfo->nVolumes = params.nVolumes;
1809 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1815 #ifdef AFS_NAMEI_ENV
1816 /* Find the link table. This should be associated with the RW volume, even
1817 * if there is only an RO volume at this site.
1820 FindLinkHandle(struct InodeSummary *isp, int nVols,
1821 struct ViceInodeInfo *allInodes)
1824 struct ViceInodeInfo *ip;
1826 for (i = 0; i < nVols; i++) {
1827 ip = allInodes + isp[i].index;
1828 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1829 if (ip[j].u.special.volumeId == isp->RWvolumeId &&
1830 ip[j].u.special.parentId == isp->RWvolumeId &&
1831 ip[j].u.special.type == VI_LINKTABLE) {
1832 return ip[j].inodeNumber;
1840 CheckDupLinktable(struct SalvInfo *salvinfo, struct InodeSummary *isp, struct ViceInodeInfo *ip)
1843 if (ip->u.vnode.vnodeNumber != INODESPECIAL) {
1844 /* not a linktable; process as a normal file */
1847 if (ip->u.special.type != VI_LINKTABLE) {
1848 /* not a linktable; process as a normal file */
1852 /* make sure nothing inc/decs it */
1855 if (ip->u.special.volumeId == ip->u.special.parentId) {
1856 /* This is a little weird, but shouldn't break anything, and there is
1857 * no known way that this can happen; just do nothing, in case deleting
1858 * it would screw something up. */
1859 Log("Inode %s appears to be a valid linktable for id (%u), but it's not\n",
1860 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId);
1861 Log("the linktable for our volume group (%u). This is unusual, since\n",
1863 Log("there should only be one linktable per volume group. I'm leaving\n");
1864 Log("it alone, just to be safe.\n");
1868 Log("Linktable %s appears to be invalid (parentid/volumeid mismatch: %u != %u)\n",
1869 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId, ip->u.special.volumeId);
1871 Log("Would have deleted linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1876 Log("Deleting linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1877 IH_INIT(tmpH, salvinfo->fileSysDevice, isp->RWvolumeId, ip->inodeNumber);
1878 namei_HandleToName(&ufs_name, tmpH);
1879 if (unlink(ufs_name.n_path) < 0) {
1880 Log("Error %d unlinking path %s\n", errno, ufs_name.n_path);
1889 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1891 struct versionStamp version;
1894 if (!VALID_INO(ino))
1896 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1897 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1898 if (!VALID_INO(ino))
1900 ("Unable to allocate link table inode for volume %" AFS_VOLID_FMT " (error = %d)\n",
1901 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1902 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1903 fdP = IH_OPEN(salvinfo->VGLinkH);
1905 Abort("Can't open link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1906 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1908 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1909 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1910 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1912 version.magic = LINKTABLEMAGIC;
1913 version.version = LINKTABLEVERSION;
1915 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1917 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1918 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1920 FDH_REALLYCLOSE(fdP);
1922 /* If the volume summary exits (i.e., the V*.vol header file exists),
1923 * then set this inode there as well.
1925 if (isp->volSummary)
1926 isp->volSummary->header.linkTable = ino;
1935 SVGParms_t *parms = (SVGParms_t *) arg;
1936 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1941 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1944 pthread_attr_t tattr;
1948 /* Initialize per volume global variables, even if later code does so */
1949 salvinfo->VolumeChanged = 0;
1950 salvinfo->VGLinkH = NULL;
1951 salvinfo->VGLinkH_cnt = 0;
1952 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1954 parms.svgp_inodeSummaryp = isp;
1955 parms.svgp_count = nVols;
1956 parms.svgp_salvinfo = salvinfo;
1957 code = pthread_attr_init(&tattr);
1959 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1963 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1965 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1968 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1970 Log("Failed to create thread to salvage volume group %u\n",
1974 (void)pthread_join(tid, NULL);
1976 #endif /* AFS_NT40_ENV */
1979 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1981 struct ViceInodeInfo *inodes, *allInodes, *ip;
1982 int i, totalInodes, size, salvageTo;
1986 int dec_VGLinkH = 0;
1988 FdHandle_t *fdP = NULL;
1990 salvinfo->VGLinkH_cnt = 0;
1991 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1992 && isp->nSpecialInodes > 0);
1993 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1994 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1997 if (ShowMounts && !haveRWvolume)
1999 if (canfork && !debug && Fork() != 0) {
2000 (void)Wait("Salvage volume group");
2003 for (i = 0, totalInodes = 0; i < nVols; i++)
2004 totalInodes += isp[i].nInodes;
2005 size = totalInodes * sizeof(struct ViceInodeInfo);
2006 inodes = malloc(size);
2007 allInodes = inodes - isp->index; /* this would the base of all the inodes
2008 * for the partition, if all the inodes
2009 * had been read into memory */
2011 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
2013 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
2015 /* Don't try to salvage a read write volume if there isn't one on this
2017 salvageTo = haveRWvolume ? 0 : 1;
2019 #ifdef AFS_NAMEI_ENV
2020 ino = FindLinkHandle(isp, nVols, allInodes);
2021 if (VALID_INO(ino)) {
2022 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
2023 fdP = IH_OPEN(salvinfo->VGLinkH);
2025 if (VALID_INO(ino) && fdP != NULL) {
2026 struct versionStamp header;
2027 afs_sfsize_t nBytes;
2029 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
2030 if (nBytes != sizeof(struct versionStamp)
2031 || header.magic != LINKTABLEMAGIC) {
2032 Log("Bad linktable header for volume %" AFS_VOLID_FMT ".\n", afs_printable_VolumeId_lu(isp->RWvolumeId));
2033 FDH_REALLYCLOSE(fdP);
2037 if (!VALID_INO(ino) || fdP == NULL) {
2038 Log("%s link table for volume %" AFS_VOLID_FMT ".\n",
2039 Testing ? "Would have recreated" : "Recreating", afs_printable_VolumeId_lu(isp->RWvolumeId));
2041 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2044 struct ViceInodeInfo *ip;
2045 CreateLinkTable(salvinfo, isp, ino);
2046 fdP = IH_OPEN(salvinfo->VGLinkH);
2047 /* Sync fake 1 link counts to the link table, now that it exists */
2049 for (i = 0; i < nVols; i++) {
2050 ip = allInodes + isp[i].index;
2051 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
2052 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 0);
2053 ip[j].linkCount = 1;
2060 FDH_REALLYCLOSE(fdP);
2062 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2065 /* Salvage in reverse order--read/write volume last; this way any
2066 * Inodes not referenced by the time we salvage the read/write volume
2067 * can be picked up by the read/write volume */
2068 /* ACTUALLY, that's not done right now--the inodes just vanish */
2069 for (i = nVols - 1; i >= salvageTo; i--) {
2071 struct InodeSummary *lisp = &isp[i];
2072 #ifdef AFS_NAMEI_ENV
2073 if (rw && (nVols > 1 || isp[i].nSpecialInodes == isp[i].nInodes)) {
2074 /* If nVols > 1, we have more than one vol in this volgroup, so
2075 * the RW inodes we detected may just be for the linktable, and
2076 * there is no actual RW volume.
2078 * Additionally, if we only have linktable inodes (no other
2079 * special inodes, no data inodes), there is also no actual RW
2080 * volume to salvage; this is just cruft left behind by something
2081 * else. In that case nVols will only be 1, though, so also
2082 * perform this linktables-only check if we don't have any
2083 * non-special inodes. */
2085 int all_linktables = 1;
2086 for (inode_i = 0; inode_i < isp[i].nSpecialInodes; inode_i++) {
2087 if (inodes[inode_i].u.special.type != VI_LINKTABLE) {
2092 if (all_linktables) {
2093 /* All we have are linktable special inodes, so skip salvaging
2094 * the RW; there was never an RW volume here. If we don't do
2095 * this, we risk creating a new "phantom" RW that the VLDB
2096 * doesn't know about, which is confusing and can cause
2104 Log("%s VOLUME %" AFS_VOLID_FMT "%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2105 afs_printable_VolumeId_lu(lisp->volumeId), (Testing ? "(READONLY mode)" : ""));
2106 /* Check inodes twice. The second time do things seriously. This
2107 * way the whole RO volume can be deleted, below, if anything goes wrong */
2108 for (check = 1; check >= 0; check--) {
2110 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2112 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2113 if (rw && deleteMe) {
2114 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2115 * volume won't be called */
2121 if (rw && check == 1)
2123 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2124 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2130 /* Fix actual inode counts */
2133 Log("totalInodes %d\n",totalInodes);
2134 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2135 static int TraceBadLinkCounts = 0;
2136 #ifdef AFS_NAMEI_ENV
2137 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2138 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2139 VGLinkH_p1 = ip->u.param[0];
2140 continue; /* Deal with this last. */
2141 } else if (CheckDupLinktable(salvinfo, isp, ip)) {
2142 /* Don't touch this inode; CheckDupLinktable has handled it */
2146 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2147 TraceBadLinkCounts--; /* Limit reports, per volume */
2148 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]); /* VolumeId in param */
2151 /* If ip->linkCount is non-zero at this point, then the linkcount
2152 * for the inode on disk is wrong. Initially linkCount is set to
2153 * the actual link count of the inode on disk, and then we (the
2154 * salvager) decrement it for every reference to that inode that we
2155 * find. So if linkCount is still positive by this point, it means
2156 * that the linkcount on disk is too high, so we should DEC the
2157 * inode. If linkCount is negative, it means the linkcount is too
2158 * low, so we should INC the inode.
2160 * If we get an error while INC'ing or DEC'ing, that's a little
2161 * odd and indicates a bug, but try to continue anyway, so the
2162 * volume may still be made accessible. */
2163 while (ip->linkCount > 0) {
2165 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2166 Log("idec failed. inode %s errno %d\n",
2167 PrintInode(stmp, ip->inodeNumber), errno);
2173 while (ip->linkCount < 0) {
2175 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2176 Log("iinc failed. inode %s errno %d\n",
2177 PrintInode(stmp, ip->inodeNumber), errno);
2184 #ifdef AFS_NAMEI_ENV
2185 while (dec_VGLinkH > 0) {
2186 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2187 Log("idec failed on link table, errno = %d\n", errno);
2191 while (dec_VGLinkH < 0) {
2192 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2193 Log("iinc failed on link table, errno = %d\n", errno);
2200 /* Directory consistency checks on the rw volume */
2202 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2203 IH_RELEASE(salvinfo->VGLinkH);
2205 if (canfork && !debug) {
2211 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2213 /* Check headers BEFORE forking */
2217 for (i = 0; i < nVols; i++) {
2218 struct VolumeSummary *vs = isp[i].volSummary;
2219 VolumeDiskData volHeader;
2221 /* Don't salvage just because phantom rw volume is there... */
2222 /* (If a read-only volume exists, read/write inodes must also exist) */
2223 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2227 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2228 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2229 == sizeof(volHeader)
2230 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2231 && volHeader.dontSalvage == DONT_SALVAGE
2232 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2233 if (volHeader.inUse != 0) {
2234 volHeader.inUse = 0;
2235 volHeader.inService = 1;
2237 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2238 != sizeof(volHeader)) {
2254 /* SalvageVolumeHeaderFile
2256 * Salvage the top level V*.vol header file. Make sure the special files
2257 * exist and that there are no duplicates.
2259 * Calls SalvageHeader for each possible type of volume special file.
2263 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2264 struct ViceInodeInfo *inodes, int RW,
2265 int check, int *deleteMe)
2268 struct ViceInodeInfo *ip;
2269 int allinodesobsolete = 1;
2270 struct VolumeDiskHeader diskHeader;
2271 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2273 struct VolumeHeader tempHeader;
2274 struct afs_inode_info stuff[MAXINODETYPE];
2276 /* keeps track of special inodes that are probably 'good'; they are
2277 * referenced in the vol header, and are included in the given inodes
2282 } goodspecial[MAXINODETYPE];
2287 memset(goodspecial, 0, sizeof(goodspecial));
2289 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2291 Log("cannot allocate memory for inode skip array when salvaging "
2292 "volume %lu; not performing duplicate special inode recovery\n",
2293 afs_printable_uint32_lu(isp->volumeId));
2294 /* still try to perform the salvage; the skip array only does anything
2295 * if we detect duplicate special inodes */
2298 init_inode_info(&tempHeader, stuff);
2301 * First, look at the special inodes and see if any are referenced by
2302 * the existing volume header. If we find duplicate special inodes, we
2303 * can use this information to use the referenced inode (it's more
2304 * likely to be the 'good' one), and throw away the duplicates.
2306 if (isp->volSummary && skip) {
2307 /* use tempHeader, so we can use the stuff[] array to easily index
2308 * into the isp->volSummary special inodes */
2309 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2311 for (i = 0; i < isp->nSpecialInodes; i++) {
2312 ip = &inodes[isp->index + i];
2313 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2314 /* will get taken care of in a later loop */
2317 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2318 goodspecial[ip->u.special.type-1].valid = 1;
2319 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2324 memset(&tempHeader, 0, sizeof(tempHeader));
2325 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2326 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2327 tempHeader.id = isp->volumeId;
2328 tempHeader.parent = isp->RWvolumeId;
2330 /* Check for duplicates (inodes are sorted by type field) */
2331 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2332 ip = &inodes[isp->index + i];
2333 if (ip->u.special.type == (ip + 1)->u.special.type) {
2334 afs_ino_str_t stmp1, stmp2;
2336 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2337 /* Will be caught in the loop below */
2341 Log("Duplicate special %d inodes for volume %" AFS_VOLID_FMT " found (%s, %s);\n",
2342 ip->u.special.type, afs_printable_VolumeId_lu(isp->volumeId),
2343 PrintInode(stmp1, ip->inodeNumber),
2344 PrintInode(stmp2, (ip+1)->inodeNumber));
2346 if (skip && goodspecial[ip->u.special.type-1].valid) {
2347 Inode gi = goodspecial[ip->u.special.type-1].inode;
2350 Log("using special inode referenced by vol header (%s)\n",
2351 PrintInode(stmp1, gi));
2354 /* the volume header references some special inode of
2355 * this type in the inodes array; are we it? */
2356 if (ip->inodeNumber != gi) {
2358 } else if ((ip+1)->inodeNumber != gi) {
2359 /* in case this is the last iteration; we need to
2360 * make sure we check ip+1, too */
2365 Log("cannot determine which is correct; salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
2373 for (i = 0; i < isp->nSpecialInodes; i++) {
2375 ip = &inodes[isp->index + i];
2376 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2378 Log("Rubbish header inode %s of type %d\n",
2379 PrintInode(stmp, ip->inodeNumber),
2380 ip->u.special.type);
2386 Log("Rubbish header inode %s of type %d; deleted\n",
2387 PrintInode(stmp, ip->inodeNumber),
2388 ip->u.special.type);
2389 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2390 if (skip && skip[i]) {
2391 if (orphans == ORPH_REMOVE) {
2392 Log("Removing orphan special inode %s of type %d\n",
2393 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2396 Log("Ignoring orphan special inode %s of type %d\n",
2397 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2398 /* fall through to the ip->linkCount--; line below */
2401 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2402 allinodesobsolete = 0;
2404 if (!check && ip->u.special.type != VI_LINKTABLE)
2405 ip->linkCount--; /* Keep the inode around */
2413 if (allinodesobsolete) {
2420 salvinfo->VGLinkH_cnt++; /* one for every header. */
2422 if (!RW && !check && isp->volSummary) {
2423 ClearROInUseBit(isp->volSummary);
2427 for (i = 0; i < MAXINODETYPE; i++) {
2428 if (stuff[i].inodeType == VI_LINKTABLE) {
2429 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2430 * And we may have recreated the link table earlier, so set the
2431 * RW header as well. The header magic was already checked.
2433 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2434 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2438 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2442 if (isp->volSummary == NULL) {
2443 char path[VMAXPATHLEN];
2444 char headerName[64];
2445 snprintf(headerName, sizeof headerName, VFORMAT,
2446 afs_printable_VolumeId_lu(isp->volumeId));
2447 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2448 salvinfo->fileSysPath, headerName);
2450 Log("No header file for volume %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(isp->volumeId));
2454 Log("No header file for volume %" AFS_VOLID_FMT "; %screating %s\n",
2455 afs_printable_VolumeId_lu(isp->volumeId), (Testing ? "it would have been " : ""),
2457 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2459 writefunc = VCreateVolumeDiskHeader;
2461 char path[VMAXPATHLEN];
2462 char headerName[64];
2463 /* hack: these two fields are obsolete... */
2464 isp->volSummary->header.volumeAcl = 0;
2465 isp->volSummary->header.volumeMountTable = 0;
2468 (&isp->volSummary->header, &tempHeader,
2469 sizeof(struct VolumeHeader))) {
2470 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2471 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2472 salvinfo->fileSysPath, headerName);
2474 Log("Header file %s is damaged or no longer valid%s\n", path,
2475 (check ? "" : "; repairing"));
2479 writefunc = VWriteVolumeDiskHeader;
2483 memcpy(&isp->volSummary->header, &tempHeader,
2484 sizeof(struct VolumeHeader));
2487 Log("It would have written a new header file for volume %" AFS_VOLID_FMT "\n",
2488 afs_printable_VolumeId_lu(isp->volumeId));
2491 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2492 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2494 Log("Error %ld writing volume header file for volume %" AFS_VOLID_FMT "\n",
2495 afs_printable_int32_ld(code),
2496 afs_printable_VolumeId_lu(diskHeader.id));
2501 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2502 isp->volSummary->header.volumeInfo);
2507 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2508 struct InodeSummary *isp, int check, int *deleteMe)
2511 VolumeDiskData volumeInfo;
2512 struct versionStamp fileHeader;
2521 #ifndef AFS_NAMEI_ENV
2522 if (sp->inodeType == VI_LINKTABLE)
2523 return 0; /* header magic was already checked */
2525 if (*(sp->inode) == 0) {
2527 Log("Missing inode in volume header (%s)\n", sp->description);
2531 Log("Missing inode in volume header (%s); %s\n", sp->description,
2532 (Testing ? "it would have recreated it" : "recreating"));
2535 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2536 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2537 if (!VALID_INO(*(sp->inode)))
2539 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2540 sp->description, errno);
2545 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2546 fdP = IH_OPEN(specH);
2547 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2548 /* bail out early and destroy the volume */
2550 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2557 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2558 sp->description, errno);
2561 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2562 || header.fileHeader.magic != sp->stamp.magic)) {
2564 Log("Part of the header (%s) is corrupted\n", sp->description);
2565 FDH_REALLYCLOSE(fdP);
2569 Log("Part of the header (%s) is corrupted; recreating\n",
2572 /* header can be garbage; make sure we don't read garbage data from
2574 memset(&header, 0, sizeof(header));
2576 #ifdef AFS_NAMEI_ENV
2577 if (namei_FixSpecialOGM(fdP, check)) {
2578 Log("Error with namei header OGM data (%s)\n", sp->description);
2579 FDH_REALLYCLOSE(fdP);
2584 if (sp->inodeType == VI_VOLINFO
2585 && header.volumeInfo.destroyMe == DESTROY_ME) {
2588 FDH_REALLYCLOSE(fdP);
2592 if (recreate && !Testing) {
2595 ("Internal error: recreating volume header (%s) in check mode\n",
2597 nBytes = FDH_TRUNC(fdP, 0);
2599 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2600 sp->description, errno);
2602 /* The following code should be moved into vutil.c */
2603 if (sp->inodeType == VI_VOLINFO) {
2605 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2606 header.volumeInfo.stamp = sp->stamp;
2607 header.volumeInfo.id = isp->volumeId;
2608 header.volumeInfo.parentId = isp->RWvolumeId;
2609 sprintf(header.volumeInfo.name, "bogus.%" AFS_VOLID_FMT, afs_printable_VolumeId_lu(isp->volumeId));
2610 Log("Warning: the name of volume %" AFS_VOLID_FMT " is now \"bogus.%" AFS_VOLID_FMT "\"\n",
2611 afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->volumeId));
2612 header.volumeInfo.inService = 0;
2613 header.volumeInfo.blessed = 0;
2614 /* The + 1000 is a hack in case there are any files out in venus caches */
2615 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2616 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2617 header.volumeInfo.needsCallback = 0;
2618 gettimeofday(&tp, NULL);
2619 header.volumeInfo.creationDate = tp.tv_sec;
2621 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2622 sizeof(header.volumeInfo), 0);
2623 if (nBytes != sizeof(header.volumeInfo)) {
2626 ("Unable to write volume header file (%s) (errno = %d)\n",
2627 sp->description, errno);
2628 Abort("Unable to write entire volume header file (%s)\n",
2632 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2633 if (nBytes != sizeof(sp->stamp)) {
2636 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2637 sp->description, errno);
2639 ("Unable to write entire version stamp in volume header file (%s)\n",
2644 FDH_REALLYCLOSE(fdP);
2646 if (sp->inodeType == VI_VOLINFO) {
2647 salvinfo->VolInfo = header.volumeInfo;
2652 if (salvinfo->VolInfo.updateDate) {
2653 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.updateDate, 0));
2655 Log("%s (%" AFS_VOLID_FMT ") %supdated %s\n", salvinfo->VolInfo.name,
2656 afs_printable_VolumeId_lu(salvinfo->VolInfo.id),
2657 (Testing ? "it would have been " : ""), update);
2659 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.creationDate, 0));
2661 Log("%s (%" AFS_VOLID_FMT ") not updated (created %s)\n",
2662 salvinfo->VolInfo.name, afs_printable_VolumeId_lu(salvinfo->VolInfo.id), update);
2672 SalvageVnodes(struct SalvInfo *salvinfo,
2673 struct InodeSummary *rwIsp,
2674 struct InodeSummary *thisIsp,
2675 struct ViceInodeInfo *inodes, int check)
2677 int ilarge, ismall, ioffset, RW, nInodes;
2678 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2681 RW = (rwIsp == thisIsp);
2682 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2684 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2685 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2686 if (check && ismall == -1)
2689 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2690 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2691 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2695 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2696 struct ViceInodeInfo *ip, int nInodes,
2697 struct VolumeSummary *volSummary, int check)
2699 char buf[SIZEOF_LARGEDISKVNODE];
2700 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2702 StreamHandle_t *file;
2703 struct VnodeClassInfo *vcp;
2705 afs_sfsize_t nVnodes;
2706 afs_fsize_t vnodeLength;
2708 afs_ino_str_t stmp1, stmp2;
2712 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2713 fdP = IH_OPEN(handle);
2714 opr_Assert(fdP != NULL);
2715 file = FDH_FDOPEN(fdP, "r+");
2716 opr_Assert(file != NULL);
2717 vcp = &VnodeClassInfo[class];
2718 size = OS_SIZE(fdP->fd_fd);
2719 opr_Assert(size != -1);
2720 nVnodes = (size / vcp->diskSize) - 1;
2722 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2723 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2727 for (vnodeIndex = 0;
2728 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2729 nVnodes--, vnodeIndex++) {
2730 if (vnode->type != vNull) {
2731 int vnodeChanged = 0;
2732 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2733 if (VNDISK_GET_INO(vnode) == 0) {
2735 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2736 memset(vnode, 0, vcp->diskSize);
2740 if (vcp->magic != vnode->vnodeMagic) {
2741 /* bad magic #, probably partially created vnode */
2743 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2744 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2745 afs_printable_uint32_lu(vcp->magic));
2746 memset(vnode, 0, vcp->diskSize);
2750 Log("Partially allocated vnode %d deleted.\n",
2752 memset(vnode, 0, vcp->diskSize);
2756 /* ****** Should do a bit more salvage here: e.g. make sure
2757 * vnode type matches what it should be given the index */
2758 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2759 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2760 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2761 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2768 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2769 /* The following doesn't work, because the version number
2770 * is not maintained correctly by the file server */
2771 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2772 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2774 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2780 /* For RW volume, look for vnode with matching inode number;
2781 * if no such match, take the first determined by our sort
2783 struct ViceInodeInfo *lip = ip;
2784 int lnInodes = nInodes;
2786 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2787 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2796 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2797 /* "Matching" inode */
2801 vu = vnode->uniquifier;
2802 iu = ip->u.vnode.vnodeUniquifier;
2803 vd = vnode->dataVersion;
2804 id = ip->u.vnode.inodeDataVersion;
2806 * Because of the possibility of the uniquifier overflows (> 4M)
2807 * we compare them modulo the low 22-bits; we shouldn't worry
2808 * about mismatching since they shouldn't to many old
2809 * uniquifiers of the same vnode...
2811 if (IUnique(vu) != IUnique(iu)) {
2813 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2816 vnode->uniquifier = iu;
2817 #ifdef AFS_3DISPARES
2818 vnode->dataVersion = (id >= vd ?
2821 1887437 ? vd : id) :
2824 1887437 ? id : vd));
2826 #if defined(AFS_SGI_EXMAG)
2827 vnode->dataVersion = (id >= vd ?
2830 15099494 ? vd : id) :
2833 15099494 ? id : vd));
2835 vnode->dataVersion = (id > vd ? id : vd);
2836 #endif /* AFS_SGI_EXMAG */
2837 #endif /* AFS_3DISPARES */
2840 /* don't bother checking for vd > id any more, since
2841 * partial file transfers always result in this state,
2842 * and you can't do much else anyway (you've already
2843 * found the best data you can) */
2844 #ifdef AFS_3DISPARES
2845 if (!vnodeIsDirectory(vnodeNumber)
2846 && ((vd < id && (id - vd) < 1887437)
2847 || ((vd > id && (vd - id) > 1887437)))) {
2849 #if defined(AFS_SGI_EXMAG)
2850 if (!vnodeIsDirectory(vnodeNumber)
2851 && ((vd < id && (id - vd) < 15099494)
2852 || ((vd > id && (vd - id) > 15099494)))) {
2854 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2855 #endif /* AFS_SGI_EXMAG */
2858 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2859 vnode->dataVersion = id;
2864 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2867 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2869 VNDISK_SET_INO(vnode, ip->inodeNumber);
2874 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2876 VNDISK_SET_INO(vnode, ip->inodeNumber);
2879 VNDISK_GET_LEN(vnodeLength, vnode);
2880 if (ip->byteCount != vnodeLength) {
2883 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2888 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2889 VNDISK_SET_LEN(vnode, ip->byteCount);
2893 ip->linkCount--; /* Keep the inode around */
2896 } else { /* no matching inode */
2898 if (VNDISK_GET_INO(vnode) != 0
2899 || vnode->type == vDirectory) {
2900 /* No matching inode--get rid of the vnode */
2902 if (VNDISK_GET_INO(vnode)) {
2904 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2908 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2913 if (VNDISK_GET_INO(vnode)) {
2915 time_t serverModifyTime = vnode->serverModifyTime;
2916 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2920 time_t serverModifyTime = vnode->serverModifyTime;
2921 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2924 memset(vnode, 0, vcp->diskSize);
2927 /* Should not reach here becuase we checked for
2928 * (inodeNumber == 0) above. And where we zero the vnode,
2929 * we also goto vnodeDone.
2933 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2937 } /* VNDISK_GET_INO(vnode) != 0 */
2939 opr_Assert(!(vnodeChanged && check));
2940 if (vnodeChanged && !Testing) {
2941 opr_Verify(IH_IWRITE(handle,
2942 vnodeIndexOffset(vcp, vnodeNumber),
2943 (char *)vnode, vcp->diskSize)
2945 salvinfo->VolumeChanged = 1; /* For break call back */
2956 struct VnodeEssence *
2957 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2960 struct VnodeInfo *vip;
2963 class = vnodeIdToClass(vnodeNumber);
2964 vip = &salvinfo->vnodeInfo[class];
2965 offset = vnodeIdToBitNumber(vnodeNumber);
2966 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2970 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2972 /* Copy the directory unconditionally if we are going to change it:
2973 * not just if was cloned.
2975 struct VnodeDiskObject vnode;
2976 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2977 Inode oldinode, newinode;
2980 if (dir->copied || Testing)
2982 DFlush(); /* Well justified paranoia... */
2985 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2986 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2988 opr_Assert(code == sizeof(vnode));
2989 oldinode = VNDISK_GET_INO(&vnode);
2990 /* Increment the version number by a whole lot to avoid problems with
2991 * clients that were promised new version numbers--but the file server
2992 * crashed before the versions were written to disk.
2995 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2996 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2998 opr_Assert(VALID_INO(newinode));
2999 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
3002 VNDISK_SET_INO(&vnode, newinode);
3004 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3005 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3007 opr_Assert(code == sizeof(vnode));
3009 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
3010 salvinfo->fileSysDevice, newinode,
3011 &salvinfo->VolumeChanged);
3012 /* Don't delete the original inode right away, because the directory is
3013 * still being scanned.
3019 * This function should either successfully create a new dir, or give up
3020 * and leave things the way they were. In particular, if it fails to write
3021 * the new dir properly, it should return w/o changing the reference to the
3025 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
3027 struct VnodeDiskObject vnode;
3028 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
3029 Inode oldinode, newinode;
3034 afs_int32 parentUnique = 1;
3035 struct VnodeEssence *vnodeEssence;
3040 Log("Salvaging directory %u...\n", dir->vnodeNumber);
3042 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
3043 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3045 opr_Assert(lcode == sizeof(vnode));
3046 oldinode = VNDISK_GET_INO(&vnode);
3047 /* Increment the version number by a whole lot to avoid problems with
3048 * clients that were promised new version numbers--but the file server
3049 * crashed before the versions were written to disk.
3052 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
3053 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
3055 opr_Assert(VALID_INO(newinode));
3056 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
3057 &salvinfo->VolumeChanged);
3059 /* Assign . and .. vnode numbers from dir and vnode.parent.
3060 * The uniquifier for . is in the vnode.
3061 * The uniquifier for .. might be set to a bogus value of 1 and
3062 * the salvager will later clean it up.
3064 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
3065 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
3068 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
3070 (vnode.parent ? vnode.parent : dir->vnodeNumber),
3075 /* didn't really build the new directory properly, let's just give up. */
3076 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3077 Log("Directory salvage returned code %d, continuing.\n", code);
3079 Log("also failed to decrement link count on new inode");
3083 Log("Checking the results of the directory salvage...\n");
3084 if (!DirOK(&newdir)) {
3085 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3086 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3087 opr_Assert(code == 0);
3091 VNDISK_SET_INO(&vnode, newinode);
3092 length = afs_dir_Length(&newdir);
3093 VNDISK_SET_LEN(&vnode, length);
3095 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3096 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3098 opr_Assert(lcode == sizeof(vnode));
3099 IH_CONDSYNC(salvinfo->vnodeInfo[vLarge].handle);
3101 /* make sure old directory file is really closed */
3102 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3103 FDH_REALLYCLOSE(fdP);
3105 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3106 opr_Assert(code == 0);
3107 dir->dirHandle = newdir;
3111 * arguments for JudgeEntry.
3113 struct judgeEntry_params {
3114 struct DirSummary *dir; /**< directory we're examining entries in */
3115 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3119 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3122 struct judgeEntry_params *params = arock;
3123 struct DirSummary *dir = params->dir;
3124 struct SalvInfo *salvinfo = params->salvinfo;
3125 struct VnodeEssence *vnodeEssence;
3126 afs_int32 dirOrphaned, todelete;
3128 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3130 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3131 if (vnodeEssence == NULL) {
3133 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3136 CopyOnWrite(salvinfo, dir);
3137 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3142 #ifndef AFS_NAMEI_ENV
3143 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3144 * mount inode for the partition. If this inode were deleted, it would crash
3147 if (vnodeEssence->InodeNumber == 0) {
3148 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3150 CopyOnWrite(salvinfo, dir);
3151 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3158 if (!(vnodeNumber & 1) && !Showmode
3159 && !(vnodeEssence->count || vnodeEssence->unique
3160 || vnodeEssence->modeBits)) {
3161 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3162 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3163 vnodeNumber, unique,
3164 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3168 CopyOnWrite(salvinfo, dir);
3169 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3175 /* Check if the Uniquifiers match. If not, change the directory entry
3176 * so its unique matches the vnode unique. Delete if the unique is zero
3177 * or if the directory is orphaned.
3179 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3180 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3183 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3185 /* This is an orphaned directory. Don't delete the . or ..
3186 * entry. Otherwise, it will get created in the next
3187 * salvage and deleted again here. So Just skip it.
3191 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3192 * Entries arriving here should be deleted, but the directory
3193 * is not orphaned. Therefore, the entry must be pointing at
3194 * the wrong vnode. Skip the 'else' clause and fall through;
3195 * the code below will repair the entry so it correctly points
3196 * at the vnode of the current directory (if '.') or the parent
3197 * directory (if '..'). */
3200 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n",
3201 dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique,
3202 vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3206 fid.Vnode = vnodeNumber;
3207 fid.Unique = vnodeEssence->unique;
3208 CopyOnWrite(salvinfo, dir);
3209 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3211 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3214 return 0; /* no need to continue */
3218 if (strcmp(name, ".") == 0) {
3219 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3221 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3224 CopyOnWrite(salvinfo, dir);
3225 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3226 fid.Vnode = dir->vnodeNumber;
3227 fid.Unique = dir->unique;
3228 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3229 vnodeNumber = fid.Vnode; /* Get the new Essence */
3230 unique = fid.Unique;
3231 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3235 } else if (strcmp(name, "..") == 0) {
3238 struct VnodeEssence *dotdot;
3239 pa.Vnode = dir->parent;
3240 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3241 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3242 pa.Unique = dotdot->unique;
3244 pa.Vnode = dir->vnodeNumber;
3245 pa.Unique = dir->unique;
3247 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3249 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3251 CopyOnWrite(salvinfo, dir);
3252 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3253 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3256 vnodeNumber = pa.Vnode; /* Get the new Essence */
3258 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3260 dir->haveDotDot = 1;
3261 } else if (strncmp(name, ".__afs", 6) == 0) {
3263 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3266 CopyOnWrite(salvinfo, dir);
3267 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3269 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3270 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3273 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3274 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3275 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3276 && !(vnodeEssence->modeBits & 0111)) {
3277 afs_sfsize_t nBytes;
3283 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3284 vnodeEssence->InodeNumber);
3287 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3291 size = FDH_SIZE(fdP);
3293 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3294 FDH_REALLYCLOSE(fdP);
3301 nBytes = FDH_PREAD(fdP, buf, size, 0);
3302 if (nBytes == size) {
3304 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3305 Log("Volume %" AFS_VOLID_FMT " (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3306 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid), dir->vname, dir->name ? dir->name : "??", name, buf,
3307 Testing ? "would convert" : "converted");
3308 vnodeEssence->modeBits |= 0111;
3309 vnodeEssence->changed = 1;
3310 } else if (ShowMounts)
3311 Log("In volume %" AFS_VOLID_FMT " (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3312 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid),
3313 dir->vname, dir->name ? dir->name : "??", name, buf);
3315 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3316 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3318 FDH_REALLYCLOSE(fdP);
3321 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3322 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3323 if (vnodeIdToClass(vnodeNumber) == vLarge
3324 && vnodeEssence->name == NULL) {
3325 vnodeEssence->name = strdup(name);
3328 /* The directory entry points to the vnode. Check to see if the
3329 * vnode points back to the directory. If not, then let the
3330 * directory claim it (else it might end up orphaned). Vnodes
3331 * already claimed by another directory are deleted from this
3332 * directory: hardlinks to the same vnode are not allowed
3333 * from different directories.
3335 if (vnodeEssence->parent != dir->vnodeNumber) {
3336 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3337 /* Vnode does not point back to this directory.
3338 * Orphaned dirs cannot claim a file (it may belong to
3339 * another non-orphaned dir).
3342 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3344 vnodeEssence->parent = dir->vnodeNumber;
3345 vnodeEssence->changed = 1;
3347 /* Vnode was claimed by another directory */
3350 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3351 } else if (vnodeNumber == 1) {
3352 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3354 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3358 CopyOnWrite(salvinfo, dir);
3359 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3364 /* This directory claims the vnode */
3365 vnodeEssence->claimed = 1;
3367 vnodeEssence->count--;
3372 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3373 VnodeClass class, Inode ino, Unique * maxu)
3375 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3376 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3377 char buf[SIZEOF_LARGEDISKVNODE];
3378 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3380 StreamHandle_t *file;
3385 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3386 fdP = IH_OPEN(vip->handle);
3387 opr_Assert(fdP != NULL);
3388 file = FDH_FDOPEN(fdP, "r+");
3389 opr_Assert(file != NULL);
3390 size = OS_SIZE(fdP->fd_fd);
3391 opr_Assert(size != -1);
3392 vip->nVnodes = (size / vcp->diskSize) - 1;
3393 if (vip->nVnodes > 0) {
3394 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3395 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3396 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3397 sizeof(struct VnodeEssence)))
3399 if (class == vLarge) {
3400 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3410 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3411 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3412 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3413 nVnodes--, vnodeIndex++) {
3414 if (vnode->type != vNull) {
3415 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3416 afs_fsize_t vnodeLength;
3417 vip->nAllocatedVnodes++;
3418 vep->count = vnode->linkCount;
3419 VNDISK_GET_LEN(vnodeLength, vnode);
3420 vep->blockCount = nBlocks(vnodeLength);
3421 vip->volumeBlockCount += vep->blockCount;
3422 vep->parent = vnode->parent;
3423 vep->unique = vnode->uniquifier;
3424 if (*maxu < vnode->uniquifier)
3425 *maxu = vnode->uniquifier;
3426 vep->modeBits = vnode->modeBits;
3427 vep->InodeNumber = VNDISK_GET_INO(vnode);
3428 vep->type = vnode->type;
3429 vep->author = vnode->author;
3430 vep->owner = vnode->owner;
3431 vep->group = vnode->group;
3432 if (vnode->type == vDirectory) {
3433 if (class != vLarge) {
3434 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3435 vip->nAllocatedVnodes--;
3436 memset(vnode, 0, sizeof(*vnode));
3437 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3438 vnodeIndexOffset(vcp, vnodeNumber),
3439 (char *)&vnode, sizeof(vnode));
3440 salvinfo->VolumeChanged = 1;
3442 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3451 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3454 struct VnodeEssence *parentvp;
3460 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3461 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3462 strcat(path, OS_DIRSEP);
3463 strcat(path, vp->name);
3469 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3470 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3473 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3475 struct VnodeEssence *vep;
3478 return (1); /* Vnode zero does not exist */
3480 return (0); /* The root dir vnode is always claimed */
3481 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3482 if (!vep || !vep->claimed)
3483 return (1); /* Vnode is not claimed - it is orphaned */
3485 return (IsVnodeOrphaned(salvinfo, vep->parent));
3489 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3490 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3491 struct DirSummary *rootdir, int *rootdirfound)
3493 static struct DirSummary dir;
3494 static struct DirHandle dirHandle;
3495 struct VnodeEssence *parent;
3496 static char path[MAXPATHLEN];
3499 if (dirVnodeInfo->vnodes[i].salvaged)
3500 return; /* already salvaged */
3503 dirVnodeInfo->vnodes[i].salvaged = 1;
3505 if (dirVnodeInfo->inodes[i] == 0)
3506 return; /* Not allocated to a directory */
3508 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3509 if (dirVnodeInfo->vnodes[i].parent) {
3510 Log("Bad parent, vnode 1; %s...\n",
3511 (Testing ? "skipping" : "salvaging"));
3512 dirVnodeInfo->vnodes[i].parent = 0;
3513 dirVnodeInfo->vnodes[i].changed = 1;
3516 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3517 if (parent && parent->salvaged == 0)
3518 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3519 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3520 rootdir, rootdirfound);
3523 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3524 dir.unique = dirVnodeInfo->vnodes[i].unique;
3527 dir.parent = dirVnodeInfo->vnodes[i].parent;
3528 dir.haveDot = dir.haveDotDot = 0;
3529 dir.ds_linkH = alinkH;
3530 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3531 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3533 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3536 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3537 (Testing ? "skipping" : "salvaging"));
3540 CopyAndSalvage(salvinfo, &dir);
3542 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3545 dirHandle = dir.dirHandle;
3548 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3549 &dirVnodeInfo->vnodes[i], path);
3552 /* If enumeration failed for random reasons, we will probably delete
3553 * too much stuff, so we guard against this instead.
3555 struct judgeEntry_params judge_params;
3556 judge_params.salvinfo = salvinfo;
3557 judge_params.dir = &dir;
3559 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3560 &judge_params) == 0);
3563 /* Delete the old directory if it was copied in order to salvage.
3564 * CopyOnWrite has written the new inode # to the disk, but we still
3565 * have the old one in our local structure here. Thus, we idec the
3569 if (dir.copied && !Testing) {
3570 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3571 opr_Assert(code == 0);
3572 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3575 /* Remember rootdir DirSummary _after_ it has been judged */
3576 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3577 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3585 * Get a new FID that can be used to create a new file.
3587 * @param[in] volHeader vol header for the volume
3588 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3589 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3590 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3591 * updated to the new max unique if we create a new
3595 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3596 VnodeClass class, AFSFid *afid, Unique *maxunique)
3599 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3600 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3604 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3605 /* no free vnodes; make a new one */
3606 salvinfo->vnodeInfo[class].nVnodes++;
3607 salvinfo->vnodeInfo[class].vnodes =
3608 realloc(salvinfo->vnodeInfo[class].vnodes,
3609 sizeof(struct VnodeEssence) * (i+1));
3611 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3614 afid->Vnode = bitNumberToVnodeNumber(i, class);
3616 if (volHeader->uniquifier < (*maxunique + 1)) {
3617 /* header uniq is bad; it will get bumped by 2000 later */
3618 afid->Unique = *maxunique + 1 + 2000;
3621 /* header uniq seems okay; just use that */
3622 afid->Unique = *maxunique = volHeader->uniquifier++;
3627 * Create a vnode for a README file explaining not to use a recreated-root vol.
3629 * @param[in] volHeader vol header for the volume
3630 * @param[in] alinkH ihandle for i/o for the volume
3631 * @param[in] vid volume id
3632 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3633 * updated to the new max unique if we create a new
3635 * @param[out] afid FID for the new readme vnode
3636 * @param[out] ainode the inode for the new readme file
3638 * @return operation status
3643 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3644 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3648 struct VnodeDiskObject *rvnode = NULL;
3650 IHandle_t *readmeH = NULL;
3651 struct VnodeEssence *vep;
3653 time_t now = time(NULL);
3655 /* Try to make the note brief, but informative. Only administrators should
3656 * be able to read this file at first, so we can hopefully assume they
3657 * know what AFS is, what a volume is, etc. */
3659 "This volume has been salvaged, but has lost its original root directory.\n"
3660 "The root directory that exists now has been recreated from orphan files\n"
3661 "from the rest of the volume. This recreated root directory may interfere\n"
3662 "with old cached data on clients, and there is no way the salvager can\n"
3663 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3664 "use this volume, but only copy the salvaged data to a new volume.\n"
3665 "Continuing to use this volume as it exists now may cause some clients to\n"
3666 "behave oddly when accessing this volume.\n"
3667 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3668 /* ^ the person reading this probably just lost some data, so they could
3669 * use some cheering up. */
3671 /* -1 for the trailing NUL */
3672 length = sizeof(readme) - 1;
3674 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3676 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3678 /* create the inode and write the contents */
3679 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3680 salvinfo->fileSysPath, 0, vid,
3681 afid->Vnode, afid->Unique, 1);
3682 if (!VALID_INO(readmeinode)) {
3683 Log("CreateReadme: readme IH_CREATE failed\n");
3687 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3688 bytes = IH_IWRITE(readmeH, 0, readme, length);
3689 IH_RELEASE(readmeH);
3691 if (bytes != length) {
3692 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3693 (int)sizeof(readme));
3697 /* create the vnode and write it out */
3698 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3700 Log("CreateRootDir: error alloc'ing memory\n");
3704 rvnode->type = vFile;
3706 rvnode->modeBits = 0777;
3707 rvnode->linkCount = 1;
3708 VNDISK_SET_LEN(rvnode, length);
3709 rvnode->uniquifier = afid->Unique;
3710 rvnode->dataVersion = 1;
3711 VNDISK_SET_INO(rvnode, readmeinode);
3712 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3717 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3719 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3720 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3721 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3723 if (bytes != SIZEOF_SMALLDISKVNODE) {
3724 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3725 (int)SIZEOF_SMALLDISKVNODE);
3729 /* update VnodeEssence for new readme vnode */
3730 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3732 vep->blockCount = nBlocks(length);
3733 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3734 vep->parent = rvnode->parent;
3735 vep->unique = rvnode->uniquifier;
3736 vep->modeBits = rvnode->modeBits;
3737 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3738 vep->type = rvnode->type;
3739 vep->author = rvnode->author;
3740 vep->owner = rvnode->owner;
3741 vep->group = rvnode->group;
3751 *ainode = readmeinode;
3756 if (IH_DEC(alinkH, readmeinode, vid)) {
3757 Log("CreateReadme (recovery): IH_DEC failed\n");
3769 * create a root dir for a volume that lacks one.
3771 * @param[in] volHeader vol header for the volume
3772 * @param[in] alinkH ihandle for disk access for this volume group
3773 * @param[in] vid volume id we're dealing with
3774 * @param[out] rootdir populated with info about the new root dir
3775 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3776 * updated to the new max unique if we create a new
3779 * @return operation status
3784 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3785 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3789 int decroot = 0, decreadme = 0;
3790 AFSFid did, readmeid;
3793 struct VnodeDiskObject *rootvnode = NULL;
3794 struct acl_accessList *ACL;
3797 struct VnodeEssence *vep;
3798 Inode readmeinode = 0;
3799 time_t now = time(NULL);
3801 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3802 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3806 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3807 /* We don't have any large vnodes in the volume; allocate room
3808 * for one so we can recreate the root dir */
3809 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3810 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3811 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3813 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3814 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3817 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3818 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3819 if (vep->type != vNull) {
3820 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3824 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3825 &readmeinode) != 0) {
3830 /* set the DV to a very high number, so it is unlikely that we collide
3831 * with a cached DV */
3834 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3836 if (!VALID_INO(rootinode)) {
3837 Log("CreateRootDir: IH_CREATE failed\n");
3842 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3843 rootinode, &salvinfo->VolumeChanged);
3847 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3848 Log("CreateRootDir: MakeDir failed\n");
3851 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3852 Log("CreateRootDir: Create failed\n");
3856 length = afs_dir_Length(&rootdir->dirHandle);
3857 DZap(&rootdir->dirHandle);
3859 /* create the new root dir vnode */
3860 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3862 Log("CreateRootDir: malloc failed\n");
3866 /* only give 'rl' permissions to 'system:administrators'. We do this to
3867 * try to catch the attention of an administrator, that they should not
3868 * be writing to this directory or continue to use it. */
3869 ACL = VVnodeDiskACL(rootvnode);
3870 ACL->size = sizeof(struct acl_accessList);
3871 ACL->version = ACL_ACLVERSION;
3875 ACL->entries[0].id = -204; /* system:administrators */
3876 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3878 rootvnode->type = vDirectory;
3879 rootvnode->cloned = 0;
3880 rootvnode->modeBits = 0777;
3881 rootvnode->linkCount = 2;
3882 VNDISK_SET_LEN(rootvnode, length);
3883 rootvnode->uniquifier = 1;
3884 rootvnode->dataVersion = dv;
3885 VNDISK_SET_INO(rootvnode, rootinode);
3886 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3887 rootvnode->author = 0;
3888 rootvnode->owner = 0;
3889 rootvnode->parent = 0;
3890 rootvnode->group = 0;
3891 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3893 /* write it out to disk */
3894 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3895 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3896 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3898 if (bytes != SIZEOF_LARGEDISKVNODE) {
3899 /* just cast to int and don't worry about printing real 64-bit ints;
3900 * a large disk vnode isn't anywhere near the 32-bit limit */
3901 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3902 (int)SIZEOF_LARGEDISKVNODE);
3906 /* update VnodeEssence for the new root vnode */
3907 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3909 vep->blockCount = nBlocks(length);
3910 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3911 vep->parent = rootvnode->parent;
3912 vep->unique = rootvnode->uniquifier;
3913 vep->modeBits = rootvnode->modeBits;
3914 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3915 vep->type = rootvnode->type;
3916 vep->author = rootvnode->author;
3917 vep->owner = rootvnode->owner;
3918 vep->group = rootvnode->group;
3928 /* update DirSummary for the new root vnode */
3929 rootdir->vnodeNumber = 1;
3930 rootdir->unique = 1;
3931 rootdir->haveDot = 1;
3932 rootdir->haveDotDot = 1;
3933 rootdir->rwVid = vid;
3934 rootdir->copied = 0;
3935 rootdir->parent = 0;
3936 rootdir->name = strdup(".");
3937 rootdir->vname = volHeader->name;
3938 rootdir->ds_linkH = alinkH;
3945 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3946 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3948 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3949 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3959 * salvage a volume group.
3961 * @param[in] salvinfo information for the curent salvage job
3962 * @param[in] rwIsp inode summary for rw volume
3963 * @param[in] alinkH link table inode handle
3965 * @return operation status
3969 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3971 /* This routine, for now, will only be called for read-write volumes */
3973 int BlocksInVolume = 0, FilesInVolume = 0;
3975 struct DirSummary rootdir, oldrootdir;
3976 struct VnodeInfo *dirVnodeInfo;
3977 struct VnodeDiskObject vnode;
3978 VolumeDiskData volHeader;
3980 int orphaned, rootdirfound = 0;
3981 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3982 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3983 struct VnodeEssence *vep;
3986 afs_sfsize_t nBytes;
3988 VnodeId LFVnode, ThisVnode;
3989 Unique LFUnique, ThisUnique;
3993 vid = rwIsp->volSummary->header.id;
3994 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3995 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3996 opr_Assert(nBytes == sizeof(volHeader));
3997 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3998 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3999 /* (should not have gotten this far with DESTROY_ME flag still set!) */
4001 DistilVnodeEssence(salvinfo, vid, vLarge,
4002 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
4003 DistilVnodeEssence(salvinfo, vid, vSmall,
4004 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
4006 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
4007 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
4008 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
4009 &rootdir, &rootdirfound);
4012 nt_sync(salvinfo->fileSysDevice);
4014 sync(); /* This used to be done lower level, for every dir */
4021 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
4023 Log("Cannot find root directory for volume %lu; attempting to create "
4024 "a new one\n", afs_printable_uint32_lu(vid));
4026 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
4031 salvinfo->VolumeChanged = 1;
4035 /* Parse each vnode looking for orphaned vnodes and
4036 * connect them to the tree as orphaned (if requested).
4038 oldrootdir = rootdir;
4039 for (class = 0; class < nVNODECLASSES; class++) {
4040 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
4041 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
4042 ThisVnode = bitNumberToVnodeNumber(v, class);
4043 ThisUnique = vep->unique;
4045 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
4046 continue; /* Ignore unused, claimed, and root vnodes */
4048 /* This vnode is orphaned. If it is a directory vnode, then the '..'
4049 * entry in this vnode had incremented the parent link count (In
4050 * JudgeEntry()). We need to go to the parent and decrement that
4051 * link count. But if the parent's unique is zero, then the parent
4052 * link count was not incremented in JudgeEntry().
4054 if (class == vLarge) { /* directory vnode */
4055 struct VnodeEssence *parent_vep;
4057 parent_vep = CheckVnodeNumber(salvinfo, vep->parent);
4060 Log("Vnode %d has invalid or out-of-range parent vnode %d;" \
4061 " ignore parent count adjustment\n",
4062 ThisVnode, vep->parent);
4063 else if (parent_vep->unique != 0) {
4064 if (vep->parent == 1 && newrootdir) {
4065 /* this vnode's parent was the volume root, and
4066 * we just created the volume root. So, the parent
4067 * dir didn't exist during JudgeEntry, so the link
4068 * count was not inc'd there, so don't dec it here.
4074 parent_vep->count++;
4080 continue; /* If no rootdir, can't attach orphaned files */
4082 /* Here we attach orphaned files and directories into the
4083 * root directory, LVVnode, making sure link counts stay correct.
4085 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
4086 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
4087 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
4089 /* Update this orphaned vnode's info. Its parent info and
4090 * link count (do for orphaned directories and files).
4092 vep->parent = LFVnode; /* Parent is the root dir */
4093 vep->unique = LFUnique;
4096 vep->count--; /* Inc link count (root dir will pt to it) */
4098 /* If this orphaned vnode is a directory, change '..'.
4099 * The name of the orphaned dir/file is unknown, so we
4100 * build a unique name. No need to CopyOnWrite the directory
4101 * since it is not connected to tree in BK or RO volume and
4102 * won't be visible there.
4104 if (class == vLarge) {
4108 /* Remove and recreate the ".." entry in this orphaned directory */
4109 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4110 salvinfo->vnodeInfo[class].inodes[v],
4111 &salvinfo->VolumeChanged);
4113 pa.Unique = LFUnique;
4114 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4115 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4117 /* The original parent's link count was decremented above.
4118 * Here we increment the new parent's link count.
4120 pv = vnodeIdToBitNumber(LFVnode);
4121 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4125 /* Go to the root dir and add this entry. The link count of the
4126 * root dir was incremented when ".." was created. Try 10 times.
4128 for (j = 0; j < 10; j++) {
4129 pa.Vnode = ThisVnode;
4130 pa.Unique = ThisUnique;
4132 snprintf(npath, sizeof npath, "%s.%u.%u",
4133 ((class == vLarge) ? "__ORPHANDIR__"
4134 : "__ORPHANFILE__"),
4135 ThisVnode, ThisUnique);
4137 CopyOnWrite(salvinfo, &rootdir);
4138 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4142 ThisUnique += 50; /* Try creating a different file */
4144 opr_Assert(code == 0);
4145 Log("Attaching orphaned %s to volume's root dir as %s\n",
4146 ((class == vLarge) ? "directory" : "file"), npath);
4148 } /* for each vnode in the class */
4149 } /* for each class of vnode */
4151 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4153 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4155 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4157 opr_Assert(code == 0);
4158 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4161 DFlush(); /* Flush the changes */
4162 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4163 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4164 orphans = ORPH_IGNORE;
4167 /* Write out all changed vnodes. Orphaned files and directories
4168 * will get removed here also (if requested).
4170 for (class = 0; class < nVNODECLASSES; class++) {
4171 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4172 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4173 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4174 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4175 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4176 for (i = 0; i < nVnodes; i++) {
4177 struct VnodeEssence *vnp = &vnodes[i];
4178 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4180 /* If the vnode is good but is unclaimed (not listed in
4181 * any directory entries), then it is orphaned.
4184 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4185 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4189 if (vnp->changed || vnp->count) {
4192 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4193 vnodeIndexOffset(vcp, vnodeNumber),
4194 (char *)&vnode, sizeof(vnode));
4195 opr_Assert(nBytes == sizeof(vnode));
4197 vnode.parent = vnp->parent;
4198 oldCount = vnode.linkCount;
4199 vnode.linkCount = vnode.linkCount - vnp->count;
4202 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4204 if (!vnp->todelete) {
4205 /* Orphans should have already been attached (if requested) */
4206 opr_Assert(orphans != ORPH_ATTACH);
4207 oblocks += vnp->blockCount;
4210 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4212 BlocksInVolume -= vnp->blockCount;
4214 if (VNDISK_GET_INO(&vnode)) {
4216 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4217 opr_Assert(code == 0);
4219 memset(&vnode, 0, sizeof(vnode));
4221 } else if (vnp->count) {
4223 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4226 vnode.modeBits = vnp->modeBits;
4229 vnode.dataVersion++;
4232 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4233 vnodeIndexOffset(vcp, vnodeNumber),
4234 (char *)&vnode, sizeof(vnode));
4235 opr_Assert(nBytes == sizeof(vnode));
4237 salvinfo->VolumeChanged = 1;
4241 if (!Showmode && ofiles) {
4242 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4244 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4248 for (class = 0; class < nVNODECLASSES; class++) {
4249 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4250 for (i = 0; i < vip->nVnodes; i++)
4251 if (vip->vnodes[i].name)
4252 free(vip->vnodes[i].name);
4259 /* Set correct resource utilization statistics */
4260 volHeader.filecount = FilesInVolume;
4261 volHeader.diskused = BlocksInVolume;
4263 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4264 if (volHeader.uniquifier < (maxunique + 1)) {
4266 Log("Volume uniquifier %u is too low (max uniq %u); fixed\n", volHeader.uniquifier, maxunique);
4267 /* Plus 2,000 in case there are workstations out there with
4268 * cached vnodes that have since been deleted
4270 volHeader.uniquifier = (maxunique + 1 + 2000);
4274 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4275 "Only use this salvaged volume to copy data to another volume; "
4276 "do not continue to use this volume (%lu) as-is.\n",
4277 afs_printable_uint32_lu(vid));
4280 if (!Testing && salvinfo->VolumeChanged) {
4281 #ifdef FSSYNC_BUILD_CLIENT
4282 if (salvinfo->useFSYNC) {
4283 afs_int32 fsync_code;
4285 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4287 Log("Error trying to tell the fileserver to break callbacks for "
4288 "changed volume %lu; error code %ld\n",
4289 afs_printable_uint32_lu(vid),
4290 afs_printable_int32_ld(fsync_code));
4292 salvinfo->VolumeChanged = 0;
4295 #endif /* FSSYNC_BUILD_CLIENT */
4297 #ifdef AFS_DEMAND_ATTACH_FS
4298 if (!salvinfo->useFSYNC) {
4299 /* A volume's contents have changed, but the fileserver will not
4300 * break callbacks on the volume until it tries to load the vol
4301 * header. So, to reduce the amount of time a client could have
4302 * stale data, remove fsstate.dat, so the fileserver will init
4303 * callback state with all clients. This is a very coarse hammer,
4304 * and in the future we should just record which volumes have
4306 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4307 if (code && errno != ENOENT) {
4308 Log("Error %d when trying to unlink FS state file %s\n", errno,
4309 AFSDIR_SERVER_FSSTATE_FILEPATH);
4315 /* Turn off the inUse bit; the volume's been salvaged! */
4316 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4317 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4318 volHeader.inService = 1; /* allow service again */
4319 if (salvinfo->VolumeChanged) {
4320 volHeader.needsCallback = 1;
4321 volHeader.updateDate = time(NULL);
4323 volHeader.needsCallback = 0;
4325 volHeader.dontSalvage = DONT_SALVAGE;
4326 salvinfo->VolumeChanged = 0;
4328 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4329 opr_Assert(nBytes == sizeof(volHeader));
4332 Log("%sSalvaged %s (%" AFS_VOLID_FMT "): %d files, %d blocks\n",
4333 (Testing ? "It would have " : ""), volHeader.name, afs_printable_VolumeId_lu(volHeader.id),
4334 FilesInVolume, BlocksInVolume);
4337 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4338 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4344 ClearROInUseBit(struct VolumeSummary *summary)
4346 IHandle_t *h = summary->volumeInfoHandle;
4347 afs_sfsize_t nBytes;
4349 VolumeDiskData volHeader;
4351 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4352 opr_Assert(nBytes == sizeof(volHeader));
4353 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4354 volHeader.inUse = 0;
4355 volHeader.needsSalvaged = 0;
4356 volHeader.inService = 1;
4357 volHeader.dontSalvage = DONT_SALVAGE;
4359 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4360 opr_Assert(nBytes == sizeof(volHeader));
4365 * Possible delete the volume.
4367 * deleteMe - Always do so, only a partial volume.
4370 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4371 char *message, int deleteMe, int check)
4373 if (readOnly(isp) || deleteMe) {
4374 if (isp->volSummary && !isp->volSummary->deleted) {
4377 Log("Volume %" AFS_VOLID_FMT " (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", afs_printable_VolumeId_lu(isp->volumeId));
4379 Log("It will be deleted on this server (you may find it elsewhere)\n");
4382 Log("Volume %" AFS_VOLID_FMT " needs to be salvaged. Since it is read-only, however,\n", afs_printable_VolumeId_lu(isp->volumeId));
4384 Log("it will be deleted instead. It should be recloned.\n");
4388 char path[VMAXPATHLEN + 10];
4389 char filename[VMAXPATHLEN];
4390 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4391 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4393 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4395 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
4396 afs_printable_int32_ld(code),
4397 afs_printable_VolumeId_lu(isp->volumeId));
4400 /* make sure we actually delete the header file; ENOENT
4401 * is fine, since VDestroyVolumeDiskHeader probably already
4403 if (unlink(path) && errno != ENOENT) {
4404 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4406 if (salvinfo->useFSYNC) {
4407 AskDelete(salvinfo, isp->volumeId);
4409 isp->volSummary->deleted = 1;
4412 } else if (!check) {
4413 Log("%s salvage was unsuccessful: read-write volume %" AFS_VOLID_FMT "\n", message,
4414 afs_printable_VolumeId_lu(isp->volumeId));
4415 Abort("Salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
4419 #ifdef AFS_DEMAND_ATTACH_FS
4421 * Locks a volume on disk for salvaging.
4423 * @param[in] volumeId volume ID to lock
4425 * @return operation status
4427 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4428 * checked out and locked again
4433 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4438 /* should always be WRITE_LOCK, but keep the lock-type logic all
4439 * in one place, in VVolLockType. Params will be ignored, but
4440 * try to provide what we're logically doing. */
4441 locktype = VVolLockType(V_VOLUPD, 1);
4443 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4445 if (code == EBUSY) {
4446 Abort("Someone else appears to be using volume %lu; Aborted\n",
4447 afs_printable_uint32_lu(volumeId));
4449 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4450 afs_printable_int32_ld(code),
4451 afs_printable_uint32_lu(volumeId));
4454 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4455 if (code == SYNC_DENIED) {
4456 /* need to retry checking out volumes */
4459 if (code != SYNC_OK) {
4460 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4461 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4464 /* set inUse = programType in the volume header to ensure that nobody
4465 * tries to use this volume again without salvaging, if we somehow crash
4466 * or otherwise exit before finishing the salvage.
4470 struct VolumeHeader header;
4471 struct VolumeDiskHeader diskHeader;
4472 struct VolumeDiskData volHeader;
4474 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4479 DiskToVolumeHeader(&header, &diskHeader);
4481 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4482 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4483 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4489 volHeader.inUse = programType;
4491 /* If we can't re-write the header, bail out and error. We don't
4492 * assert when reading the header, since it's possible the
4493 * header isn't really there (when there's no data associated
4494 * with the volume; we just delete the vol header file in that
4495 * case). But if it's there enough that we can read it, but
4496 * somehow we cannot write to it to signify we're salvaging it,
4497 * we've got a big problem and we cannot continue. */
4498 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4499 == sizeof(volHeader));
4506 #endif /* AFS_DEMAND_ATTACH_FS */
4509 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4511 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4513 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4514 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4515 if (code != SYNC_OK) {
4516 Log("AskError: failed to force volume %lu into error state; "
4517 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4518 (long)code, SYNC_res2string(code));
4520 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4524 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4529 memset(&res, 0, sizeof(res));
4531 for (i = 0; i < 3; i++) {
4532 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4533 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4535 if (code == SYNC_OK) {
4537 } else if (code == SYNC_DENIED) {
4539 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4541 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4542 Abort("Salvage aborted\n");
4543 } else if (code == SYNC_BAD_COMMAND) {
4544 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4547 #ifdef AFS_DEMAND_ATTACH_FS
4548 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4550 Log("AskOffline: fileserver is DAFS but we are not.\n");
4553 #ifdef AFS_DEMAND_ATTACH_FS
4554 Log("AskOffline: fileserver is not DAFS but we are.\n");
4556 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4559 Abort("Salvage aborted\n");
4562 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4563 FSYNC_clientFinis();
4567 if (code != SYNC_OK) {
4568 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4569 Abort("Salvage aborted\n");
4573 /* don't want to pass around state; remember it here */
4574 static int isDAFS = -1;
4579 afs_int32 code = 1, i;
4581 /* we don't care if we race. the answer shouldn't change */
4585 memset(&res, 0, sizeof(res));
4587 for (i = 0; code && i < 3; i++) {
4588 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4590 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4591 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4592 FSYNC_reason2string(res.hdr.reason));
4593 FSYNC_clientFinis();
4599 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4603 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4613 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4615 struct VolumeDiskHeader diskHdr;
4617 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4619 /* volume probably does not exist; no need to bring back online */
4622 AskOnline(salvinfo, volumeId);
4626 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4630 for (i = 0; i < 3; i++) {
4631 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4632 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4634 if (code == SYNC_OK) {
4636 } else if (code == SYNC_DENIED) {
4637 Log("AskOnline: file server denied online request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4638 } else if (code == SYNC_BAD_COMMAND) {
4639 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4641 Log("AskOnline: please make sure file server binaries are same version.\n");
4645 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4646 FSYNC_clientFinis();
4653 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4658 for (i = 0; i < 3; i++) {
4659 memset(&res, 0, sizeof(res));
4660 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4661 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4663 if (code == SYNC_OK) {
4665 } else if (code == SYNC_DENIED) {
4666 Log("AskOnline: file server denied DONE request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4667 } else if (code == SYNC_BAD_COMMAND) {
4668 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4671 #ifdef AFS_DEMAND_ATTACH_FS
4672 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4674 Log("AskOnline: fileserver is DAFS but we are not.\n");
4677 #ifdef AFS_DEMAND_ATTACH_FS
4678 Log("AskOnline: fileserver is not DAFS but we are.\n");
4680 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4684 } else if (code == SYNC_FAILED &&
4685 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4686 res.hdr.reason == FSYNC_WRONG_PART)) {
4687 /* volume is already effectively 'deleted' */
4691 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4692 FSYNC_clientFinis();
4699 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4701 /* Volume parameter is passed in case iopen is upgraded in future to
4702 * require a volume Id to be passed
4705 IHandle_t *srcH, *destH;
4706 FdHandle_t *srcFdP, *destFdP;
4708 afs_foff_t size = 0;
4710 IH_INIT(srcH, device, rwvolume, inode1);
4711 srcFdP = IH_OPEN(srcH);
4712 opr_Assert(srcFdP != NULL);
4713 IH_INIT(destH, device, rwvolume, inode2);
4714 destFdP = IH_OPEN(destH);
4715 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4716 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4719 opr_Assert(nBytes == 0);
4720 FDH_REALLYCLOSE(srcFdP);
4721 FDH_REALLYCLOSE(destFdP);
4728 PrintInodeList(struct SalvInfo *salvinfo)
4730 struct ViceInodeInfo *ip;
4731 struct ViceInodeInfo *buf;
4734 afs_sfsize_t st_size;
4736 st_size = OS_SIZE(salvinfo->inodeFd);
4737 opr_Assert(st_size >= 0);
4738 buf = malloc(st_size);
4739 opr_Assert(buf != NULL);
4740 nInodes = st_size / sizeof(struct ViceInodeInfo);
4741 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4742 for (ip = buf; nInodes--; ip++) {
4743 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%" AFS_VOLID_FMT ",%u,%u,%u)\n", /* VolumeId in param */
4744 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4745 (afs_uintmax_t) ip->byteCount,
4746 afs_printable_VolumeId_lu(ip->u.param[0]), ip->u.param[1],
4747 ip->u.param[2], ip->u.param[3]);
4753 PrintInodeSummary(struct SalvInfo *salvinfo)
4756 struct InodeSummary *isp;
4758 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4759 isp = &salvinfo->inodeSummary[i];
4760 Log("VID:%" AFS_VOLID_FMT ", RW:%" AFS_VOLID_FMT ", index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->RWvolumeId), isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4770 opr_Assert(0); /* Fork is never executed in the NT code path */
4774 #ifdef AFS_DEMAND_ATTACH_FS
4775 if ((f == 0) && (programType == salvageServer)) {
4776 /* we are a salvageserver child */
4777 #ifdef FSSYNC_BUILD_CLIENT
4778 VChildProcReconnectFS_r();
4780 #ifdef SALVSYNC_BUILD_CLIENT
4784 #endif /* AFS_DEMAND_ATTACH_FS */
4785 #endif /* !AFS_NT40_ENV */
4792 #ifdef AFS_DEMAND_ATTACH_FS
4793 if (programType == salvageServer) {
4794 /* release all volume locks before closing down our SYNC channels.
4795 * the fileserver may try to online volumes we have checked out when
4796 * we close down FSSYNC, so we should make sure we don't have those
4797 * volumes locked when it does */
4798 struct DiskPartition64 *dp;
4800 for (i = 0; i <= VOLMAXPARTS; i++) {
4801 dp = VGetPartitionById(i, 0);
4803 VLockFileReinit(&dp->volLockFile);
4806 # ifdef SALVSYNC_BUILD_CLIENT
4809 # ifdef FSSYNC_BUILD_CLIENT
4813 #endif /* AFS_DEMAND_ATTACH_FS */
4816 if (main_thread != pthread_self())
4817 pthread_exit((void *)code);
4838 pid = wait(&status);
4839 opr_Assert(pid != -1);
4840 if (WCOREDUMP(status))
4841 Log("\"%s\" core dumped!\n", prog);
4842 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4848 TimeStamp(char *buffer, size_t size, time_t clock, int precision)
4853 lt = localtime(&clock);
4855 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M:%S", lt);
4857 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M", lt);
4859 memset(buffer, 0, size);
4864 SalvageShowLog(void)
4870 if (ShowLog == 0 || ClientMode) {
4871 return; /* nothing to do */
4873 filename = strdup(GetLogFilename());
4874 opr_Assert(filename != NULL);
4877 logFile = afs_fopen(filename, "r");
4879 printf("Can't read %s, exiting\n", ShowLogFilename);
4881 while (fgets(line, sizeof(line), logFile))
4889 vLog(const char *format, va_list args)
4892 vFSLog(format, args);
4897 gettimeofday(&now, NULL);
4898 fprintf(stderr, "%s ", TimeStamp(buffer, sizeof(buffer), now.tv_sec, 1));
4899 vfprintf(stderr, format, args);
4905 Log(const char *format, ...)
4909 va_start(args, format);
4915 Abort(const char *format, ...)
4919 va_start(args, format);
4929 ToString(const char *s)
4933 opr_Assert(p != NULL);
4937 /* Remove the FORCESALVAGE file */
4939 RemoveTheForce(char *path)
4942 struct afs_stat_st force; /* so we can use afs_stat to find it */
4943 strcpy(target,path);
4944 strcat(target,"/FORCESALVAGE");
4945 if (!Testing && ForceSalvage) {
4946 if (afs_stat(target,&force) == 0) unlink(target);
4950 #ifndef AFS_AIX32_ENV
4952 * UseTheForceLuke - see if we can use the force
4955 UseTheForceLuke(char *path)
4957 struct afs_stat_st force;
4959 strcpy(target,path);
4960 strcat(target,"/FORCESALVAGE");
4962 return (afs_stat(target, &force) == 0);
4966 * UseTheForceLuke - see if we can use the force
4969 * The VRMIX fsck will not muck with the filesystem it is supposedly
4970 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4971 * muck directly with the root inode, which is within the normal
4973 * ListViceInodes() has a side effect of setting ForceSalvage if
4974 * it detects a need, based on root inode examination.
4977 UseTheForceLuke(char *path)
4980 return 0; /* sorry OB1 */
4985 /* NT support routines */
4987 static char execpathname[MAX_PATH];
4989 nt_SalvagePartition(char *partName, int jobn)
4994 if (!*execpathname) {
4995 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4996 if (!n || n == 1023)
4999 job.cj_magic = SALVAGER_MAGIC;
5000 job.cj_number = jobn;
5001 (void)strcpy(job.cj_part, partName);
5002 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
5007 nt_SetupPartitionSalvage(void *datap, int len)
5009 childJob_t *jobp = (childJob_t *) datap;
5012 if (len != sizeof(childJob_t))
5014 if (jobp->cj_magic != SALVAGER_MAGIC)
5019 if (asprintf(&logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
5020 myjob.cj_number) < 0)
5029 #endif /* AFS_NT40_ENV */