2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #ifdef AFS_PTHREAD_ENV
104 # include <opr/lock.h>
107 #include <afs/afsint.h>
108 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
109 #if defined(AFS_VFSINCL_ENV)
110 #include <sys/vnode.h>
112 #include <sys/fs/ufs_inode.h>
114 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
115 #include <ufs/ufs/dinode.h>
116 #include <ufs/ffs/fs.h>
118 #include <ufs/inode.h>
121 #else /* AFS_VFSINCL_ENV */
123 #include <ufs/inode.h>
124 #else /* AFS_OSF_ENV */
125 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
126 #include <sys/inode.h>
129 #endif /* AFS_VFSINCL_ENV */
130 #endif /* AFS_SGI_ENV */
133 #include <sys/lockf.h>
136 #include <checklist.h>
138 #if defined(AFS_SGI_ENV)
141 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
143 #include <sys/mnttab.h>
144 #include <sys/mntent.h>
149 #endif /* AFS_SGI_ENV */
150 #endif /* AFS_HPUX_ENV */
154 #include <afs/osi_inode.h>
158 #include <afs/afsutil.h>
159 #include <afs/fileutil.h>
160 #include <rx/rx_queue.h>
165 #include <afs/afssyscalls.h>
169 #include "partition.h"
170 #include "daemon_com.h"
171 #include "daemon_com_inline.h"
173 #include "fssync_inline.h"
174 #include "volume_inline.h"
175 #include "salvsync.h"
176 #include "viceinode.h"
178 #include "volinodes.h" /* header magic number, etc. stuff */
179 #include "vol-salvage.h"
181 #include "vol_internal.h"
183 #include <afs/prs_fs.h>
185 #ifdef FSSYNC_BUILD_CLIENT
186 #include "vg_cache.h"
194 extern void *calloc();
196 static char *TimeStamp(time_t clock, int precision);
199 int debug; /* -d flag */
200 extern int Testing; /* -n flag */
201 int ListInodeOption; /* -i flag */
202 int ShowRootFiles; /* -r flag */
203 int RebuildDirs; /* -sal flag */
204 int Parallel = 4; /* -para X flag */
205 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
206 int forceR = 0; /* -b flag */
207 int ShowLog = 0; /* -showlog flag */
208 int ShowSuid = 0; /* -showsuid flag */
209 int ShowMounts = 0; /* -showmounts flag */
210 int orphans = ORPH_IGNORE; /* -orphans option */
215 int useSyslog = 0; /* -syslog flag */
216 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
225 #define MAXPARALLEL 32
227 int OKToZap; /* -o flag */
228 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
229 * in the volume header */
231 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
233 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
236 * information that is 'global' to a particular salvage job.
239 Device fileSysDevice; /**< The device number of the current partition
241 char fileSysPath[9]; /**< The path of the mounted partition currently
242 * being salvaged, i.e. the directory containing
243 * the volume headers */
244 char *fileSysPathName; /**< NT needs this to make name pretty log. */
245 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
246 int VGLinkH_cnt; /**< # of references to lnk handle. */
247 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
250 char *fileSysDeviceName; /**< The block device where the file system being
251 * salvaged was mounted */
252 char *filesysfulldev;
254 int VolumeChanged; /**< Set by any routine which would change the
255 * volume in a way which would require callbacks
256 * to be broken if the volume was put back on
257 * on line by an active file server */
259 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
260 * header dealt with */
262 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
263 FD_t inodeFd; /**< File descriptor for inode file */
265 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
266 int nVolumes; /**< Number of volumes (read-write and read-only)
267 * in volume summary */
268 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
271 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
272 * vnodes in the volume that
273 * we are currently looking
275 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
276 * to contact the fileserver over FSYNC */
283 /* Forward declarations */
284 static void QuietExit(int) AFS_NORETURN;
285 static void SalvageShowLog(void);
286 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
287 static int AskVolumeSummary(struct SalvInfo *salvinfo,
288 VolumeId singleVolumeNumber);
289 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
290 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
292 #ifdef AFS_DEMAND_ATTACH_FS
293 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
294 #endif /* AFS_DEMAND_ATTACH_FS */
296 /* Uniquifier stored in the Inode */
301 return (u & 0x3fffff);
303 #if defined(AFS_SGI_EXMAG)
304 return (u & SGI_UNIQMASK);
307 #endif /* AFS_SGI_EXMAG */
314 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
316 return 0; /* otherwise may be transient, e.g. EMFILE */
321 char *save_args[MAX_ARGS];
323 extern pthread_t main_thread;
324 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
328 * Get the salvage lock if not already held. Hold until process exits.
330 * @param[in] locktype READ_LOCK or WRITE_LOCK
333 _ObtainSalvageLock(int locktype)
335 struct VLockFile salvageLock;
340 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
342 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
345 "salvager: There appears to be another salvager running! "
350 "salvager: Error %d trying to acquire salvage lock! "
356 ObtainSalvageLock(void)
358 _ObtainSalvageLock(WRITE_LOCK);
361 ObtainSharedSalvageLock(void)
363 _ObtainSalvageLock(READ_LOCK);
367 #ifdef AFS_SGI_XFS_IOPS_ENV
368 /* Check if the given partition is mounted. For XFS, the root inode is not a
369 * constant. So we check the hard way.
372 IsPartitionMounted(char *part)
375 struct mntent *mntent;
377 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
378 while (mntent = getmntent(mntfp)) {
379 if (!strcmp(part, mntent->mnt_dir))
384 return mntent ? 1 : 1;
387 /* Check if the given inode is the root of the filesystem. */
388 #ifndef AFS_SGI_XFS_IOPS_ENV
390 IsRootInode(struct afs_stat_st *status)
393 * The root inode is not a fixed value in XFS partitions. So we need to
394 * see if the partition is in the list of mounted partitions. This only
395 * affects the SalvageFileSys path, so we check there.
397 return (status->st_ino == ROOTINODE);
402 #ifndef AFS_NAMEI_ENV
403 /* We don't want to salvage big files filesystems, since we can't put volumes on
407 CheckIfBigFilesFS(char *mountPoint, char *devName)
409 struct superblock fs;
412 if (strncmp(devName, "/dev/", 5)) {
413 (void)sprintf(name, "/dev/%s", devName);
415 (void)strcpy(name, devName);
418 if (ReadSuper(&fs, name) < 0) {
419 Log("Unable to read superblock. Not salvaging partition %s.\n",
423 if (IsBigFilesFileSystem(&fs)) {
424 Log("Partition %s is a big files filesystem, not salvaging.\n",
434 #define HDSTR "\\Device\\Harddisk"
435 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
437 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
443 static int dowarn = 1;
445 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
447 if (strncmp(res1, HDSTR, HDLEN)) {
450 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
451 res1, HDSTR, p1->devName);
454 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
456 if (strncmp(res2, HDSTR, HDLEN)) {
459 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
460 res2, HDSTR, p2->devName);
464 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
467 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
470 /* This assumes that two partitions with the same device number divided by
471 * PartsPerDisk are on the same disk.
474 SalvageFileSysParallel(struct DiskPartition64 *partP)
477 struct DiskPartition64 *partP;
478 int pid; /* Pid for this job */
479 int jobnumb; /* Log file job number */
480 struct job *nextjob; /* Next partition on disk to salvage */
482 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
483 struct job *thisjob = 0;
484 static int numjobs = 0;
485 static int jobcount = 0;
491 char logFileName[256];
495 /* We have a partition to salvage. Copy it into thisjob */
496 thisjob = calloc(1, sizeof(struct job));
498 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
501 thisjob->partP = partP;
502 thisjob->jobnumb = jobcount;
504 } else if (jobcount == 0) {
505 /* We are asking to wait for all jobs (partp == 0), yet we never
508 Log("No file system partitions named %s* found; not salvaged\n",
509 VICE_PARTITION_PREFIX);
513 if (debug || Parallel == 1) {
515 SalvageFileSys(thisjob->partP, 0);
522 /* Check to see if thisjob is for a disk that we are already
523 * salvaging. If it is, link it in as the next job to do. The
524 * jobs array has 1 entry per disk being salvages. numjobs is
525 * the total number of disks currently being salvaged. In
526 * order to keep thejobs array compact, when a disk is
527 * completed, the hightest element in the jobs array is moved
528 * down to now open slot.
530 for (j = 0; j < numjobs; j++) {
531 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
532 /* On same disk, add it to this list and return */
533 thisjob->nextjob = jobs[j]->nextjob;
534 jobs[j]->nextjob = thisjob;
541 /* Loop until we start thisjob or until all existing jobs are finished */
542 while (thisjob || (!partP && (numjobs > 0))) {
543 startjob = -1; /* No new job to start */
545 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
546 /* Either the max jobs are running or we have to wait for all
547 * the jobs to finish. In either case, we wait for at least one
548 * job to finish. When it's done, clean up after it.
550 pid = wait(&wstatus);
551 opr_Assert(pid != -1);
552 for (j = 0; j < numjobs; j++) { /* Find which job it is */
553 if (pid == jobs[j]->pid)
556 opr_Assert(j < numjobs);
557 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
558 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
561 numjobs--; /* job no longer running */
562 oldjob = jobs[j]; /* remember */
563 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
564 free(oldjob); /* free the old job */
566 /* If there is another partition on the disk to salvage, then
567 * say we will start it (startjob). If not, then put thisjob there
568 * and say we will start it.
570 if (jobs[j]) { /* Another partitions to salvage */
571 startjob = j; /* Will start it */
572 } else { /* There is not another partition to salvage */
574 jobs[j] = thisjob; /* Add thisjob */
576 startjob = j; /* Will start it */
578 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
579 startjob = -1; /* Don't start it - already running */
583 /* We don't have to wait for a job to complete */
585 jobs[numjobs] = thisjob; /* Add this job */
587 startjob = numjobs; /* Will start it */
591 /* Start up a new salvage job on a partition in job slot "startjob" */
592 if (startjob != -1) {
594 Log("Starting salvage of file system partition %s\n",
595 jobs[startjob]->partP->name);
597 /* For NT, we not only fork, but re-exec the salvager. Pass in the
598 * commands and pass the child job number via the data path.
601 nt_SalvagePartition(jobs[startjob]->partP->name,
602 jobs[startjob]->jobnumb);
603 jobs[startjob]->pid = pid;
608 jobs[startjob]->pid = pid;
613 ShowLog = 0; /* Child processes do not display. */
614 for (fd = 0; fd < 16; fd++)
621 openlog("salvager", LOG_PID, useSyslogFacility);
625 snprintf(logFileName, sizeof logFileName, "%s.%d",
626 AFSDIR_SERVER_SLVGLOG_FILEPATH,
627 jobs[startjob]->jobnumb);
628 logFile = afs_fopen(logFileName, "w");
633 SalvageFileSys1(jobs[startjob]->partP, 0);
638 } /* while ( thisjob || (!partP && numjobs > 0) ) */
640 /* If waited for all jobs to complete, now collect log files and return */
642 if (!useSyslog) /* if syslogging - no need to collect */
645 for (i = 0; i < jobcount; i++) {
646 snprintf(logFileName, sizeof logFileName, "%s.%d",
647 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
648 if ((passLog = afs_fopen(logFileName, "r"))) {
649 while (fgets(buf, sizeof(buf), passLog)) {
654 (void)unlink(logFileName);
663 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
665 if (!canfork || debug || Fork() == 0) {
666 SalvageFileSys1(partP, singleVolumeNumber);
667 if (canfork && !debug) {
671 Wait("SalvageFileSys");
675 get_DevName(char *pbuffer, char *wpath)
677 char pbuf[128], *ptr;
678 strcpy(pbuf, pbuffer);
679 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
685 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
687 strcpy(pbuffer, ptr + 1);
694 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
697 char inodeListPath[256];
698 FD_t inodeFile = INVALID_FD;
699 static char tmpDevName[100];
700 static char wpath[100];
701 struct VolumeSummary *vsp, *esp;
705 struct SalvInfo l_salvinfo;
706 struct SalvInfo *salvinfo = &l_salvinfo;
709 memset(salvinfo, 0, sizeof(*salvinfo));
712 if (inodeFile != INVALID_FD) {
714 inodeFile = INVALID_FD;
716 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
717 Abort("Raced too many times with fileserver restarts while trying to "
718 "checkout/lock volumes; Aborted\n");
720 #ifdef AFS_DEMAND_ATTACH_FS
722 /* unlock all previous volume locks, since we're about to lock them
724 VLockFileReinit(&partP->volLockFile);
726 #endif /* AFS_DEMAND_ATTACH_FS */
728 salvinfo->fileSysPartition = partP;
729 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
730 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
733 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
734 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
735 name = partP->devName;
737 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
738 strcpy(tmpDevName, partP->devName);
739 name = get_DevName(tmpDevName, wpath);
740 salvinfo->fileSysDeviceName = name;
741 salvinfo->filesysfulldev = wpath;
744 if (singleVolumeNumber) {
745 #ifndef AFS_DEMAND_ATTACH_FS
746 /* only non-DAFS locks the partition when salvaging a single volume;
747 * DAFS will lock the individual volumes in the VG */
748 VLockPartition(partP->name);
749 #endif /* !AFS_DEMAND_ATTACH_FS */
753 /* salvageserver already setup fssync conn for us */
754 if ((programType != salvageServer) && !VConnectFS()) {
755 Abort("Couldn't connect to file server\n");
758 salvinfo->useFSYNC = 1;
759 AskOffline(salvinfo, singleVolumeNumber);
760 #ifdef AFS_DEMAND_ATTACH_FS
761 if (LockVolume(salvinfo, singleVolumeNumber)) {
764 #endif /* AFS_DEMAND_ATTACH_FS */
767 salvinfo->useFSYNC = 0;
768 VLockPartition(partP->name);
772 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
775 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
776 partP->name, name, (Testing ? "(READONLY mode)" : ""));
778 Log("***Forced salvage of all volumes on this partition***\n");
783 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
790 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
791 while ((dp = readdir(dirp))) {
792 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
793 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
795 Log("Removing old salvager temp files %s\n", dp->d_name);
796 strcpy(npath, salvinfo->fileSysPath);
797 strcat(npath, OS_DIRSEP);
798 strcat(npath, dp->d_name);
804 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
806 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
807 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
809 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
813 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
814 if (inodeFile == INVALID_FD) {
815 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
818 /* Using nt_unlink here since we're really using the delete on close
819 * semantics of unlink. In most places in the salvager, we really do
820 * mean to unlink the file at that point. Those places have been
821 * modified to actually do that so that the NT crt can be used there.
823 * jaltman - On NT delete on close cannot be applied to a file while the
824 * process has an open file handle that does not have DELETE file
825 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
826 * delete privileges. As a result the nt_unlink() call will always
829 code = nt_unlink(inodeListPath);
831 code = unlink(inodeListPath);
834 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
837 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
841 salvinfo->inodeFd = inodeFile;
842 if (salvinfo->inodeFd == INVALID_FD)
843 Abort("Temporary file %s is missing...\n", inodeListPath);
844 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
845 if (ListInodeOption) {
846 PrintInodeList(salvinfo);
847 if (singleVolumeNumber) {
848 /* We've checked out the volume from the fileserver, and we need
849 * to give it back. We don't know if the volume exists or not,
850 * so we don't know whether to AskOnline or not. Try to determine
851 * if the volume exists by trying to read the volume header, and
852 * AskOnline if it is readable. */
853 MaybeAskOnline(salvinfo, singleVolumeNumber);
857 /* enumerate volumes in the partition.
858 * figure out sets of read-only + rw volumes.
859 * salvage each set, read-only volumes first, then read-write.
860 * Fix up inodes on last volume in set (whether it is read-write
863 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
867 if (singleVolumeNumber) {
868 /* If we delete a volume during the salvage, we indicate as such by
869 * setting the volsummary->deleted field. We need to know if we
870 * deleted a volume or not in order to know which volumes to bring
871 * back online after the salvage. If we fork, we will lose this
872 * information, since volsummary->deleted will not get set in the
873 * parent. So, don't fork. */
877 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
878 i < salvinfo->nVolumesInInodeFile; i = j) {
879 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
881 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
883 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
884 struct VolumeSummary *tsp;
885 /* Scan volume list (from partition root directory) looking for the
886 * current rw volume number in the volume list from the inode scan.
887 * If there is one here that is not in the inode volume list,
889 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
891 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
893 /* Now match up the volume summary info from the root directory with the
894 * entry in the volume list obtained from scanning inodes */
895 salvinfo->inodeSummary[j].volSummary = NULL;
896 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
897 if (tsp->header.id == vid) {
898 salvinfo->inodeSummary[j].volSummary = tsp;
904 /* Salvage the group of volumes (several read-only + 1 read/write)
905 * starting with the current read-only volume we're looking at.
908 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
910 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
911 #endif /* AFS_NT40_ENV */
915 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
916 for (; vsp < esp; vsp++) {
918 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
921 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
922 RemoveTheForce(salvinfo->fileSysPath);
924 if (!Testing && singleVolumeNumber) {
926 #ifdef AFS_DEMAND_ATTACH_FS
927 /* unlock vol headers so the fs can attach them when we AskOnline */
928 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
929 #endif /* AFS_DEMAND_ATTACH_FS */
931 /* Step through the volumeSummary list and set all volumes on-line.
932 * Most volumes were taken off-line in GetVolumeSummary.
933 * If a volume was deleted, don't tell the fileserver anything, since
934 * we already told the fileserver the volume was deleted back when we
935 * we destroyed the volume header.
936 * Also, make sure we bring the singleVolumeNumber back online first.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
942 if (!salvinfo->volumeSummaryp[j].deleted) {
943 AskOnline(salvinfo, singleVolumeNumber);
949 /* If singleVolumeNumber is not in our volumeSummary, it means that
950 * at least one other volume in the VG is on the partition, but the
951 * RW volume is not. We've already AskOffline'd it by now, though,
952 * so make sure we don't still have the volume checked out. */
953 AskDelete(salvinfo, singleVolumeNumber);
956 for (j = 0; j < salvinfo->nVolumes; j++) {
957 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
958 if (!salvinfo->volumeSummaryp[j].deleted) {
959 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
965 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
966 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
969 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
973 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
976 char filename[VMAXPATHLEN];
982 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
983 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
986 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
989 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
991 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
992 afs_printable_int32_ld(code),
993 afs_printable_VolumeId_lu(vsp->header.id));
996 /* make sure we actually delete the header file; ENOENT
997 * is fine, since VDestroyVolumeDiskHeader probably already
999 if (unlink(path) && errno != ENOENT) {
1000 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1002 if (salvinfo->useFSYNC) {
1003 AskDelete(salvinfo, vsp->header.id);
1010 CompareInodes(const void *_p1, const void *_p2)
1012 const struct ViceInodeInfo *p1 = _p1;
1013 const struct ViceInodeInfo *p2 = _p2;
1014 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1015 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1016 VolumeId p1rwid, p2rwid;
1018 (p1->u.vnode.vnodeNumber ==
1019 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1021 (p2->u.vnode.vnodeNumber ==
1022 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1023 if (p1rwid < p2rwid)
1025 if (p1rwid > p2rwid)
1027 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1028 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1029 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1030 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1031 if (p1->u.vnode.volumeId == p1rwid)
1033 if (p2->u.vnode.volumeId == p2rwid)
1035 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1037 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1038 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1039 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1041 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1043 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1045 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1047 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1049 /* The following tests are reversed, so that the most desirable
1050 * of several similar inodes comes first */
1051 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1052 #ifdef AFS_3DISPARES
1053 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1054 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1057 #ifdef AFS_SGI_EXMAG
1058 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1059 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1064 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1065 #ifdef AFS_3DISPARES
1066 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1067 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1070 #ifdef AFS_SGI_EXMAG
1071 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1072 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1077 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1078 #ifdef AFS_3DISPARES
1079 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1080 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1083 #ifdef AFS_SGI_EXMAG
1084 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1085 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1090 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1091 #ifdef AFS_3DISPARES
1092 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1093 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1096 #ifdef AFS_SGI_EXMAG
1097 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1098 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1107 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1108 struct InodeSummary *summary)
1110 VolumeId volume = ip->u.vnode.volumeId;
1111 VolumeId rwvolume = volume;
1116 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1118 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1120 rwvolume = ip->u.special.parentId;
1121 /* This isn't quite right, as there could (in error) be different
1122 * parent inodes in different special vnodes */
1124 if (maxunique < ip->u.vnode.vnodeUniquifier)
1125 maxunique = ip->u.vnode.vnodeUniquifier;
1129 summary->volumeId = volume;
1130 summary->RWvolumeId = rwvolume;
1131 summary->nInodes = n;
1132 summary->nSpecialInodes = nSpecial;
1133 summary->maxUniquifier = maxunique;
1137 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, VolumeId singleVolumeNumber, void *rock)
1139 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1140 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1141 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1146 * Collect list of inodes in file named by path. If a truly fatal error,
1147 * unlink the file and abort. For lessor errors, return -1. The file will
1148 * be unlinked by the caller.
1151 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1155 struct ViceInodeInfo *ip, *ip_save;
1156 struct InodeSummary summary;
1157 char summaryFileName[50];
1158 FD_t summaryFile = INVALID_FD;
1160 char *dev = salvinfo->fileSysPath;
1161 char *wpath = salvinfo->fileSysPath;
1163 char *dev = salvinfo->fileSysDeviceName;
1164 char *wpath = salvinfo->filesysfulldev;
1166 char *part = salvinfo->fileSysPath;
1171 afs_sfsize_t st_size;
1173 /* This file used to come from vfsck; cobble it up ourselves now... */
1175 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1176 singleVolumeNumber ? OnlyOneVolume : 0,
1177 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1179 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1183 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1185 if (forceSal && !ForceSalvage) {
1186 Log("***Forced salvage of all volumes on this partition***\n");
1189 OS_SEEK(inodeFile, 0L, SEEK_SET);
1190 salvinfo->inodeFd = inodeFile;
1191 if (salvinfo->inodeFd == INVALID_FD ||
1192 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1193 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1195 tdir = (tmpdir ? tmpdir : part);
1197 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1198 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1200 snprintf(summaryFileName, sizeof summaryFileName,
1201 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1203 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1204 if (summaryFile == INVALID_FD) {
1205 Abort("Unable to create inode summary file\n");
1209 /* Using nt_unlink here since we're really using the delete on close
1210 * semantics of unlink. In most places in the salvager, we really do
1211 * mean to unlink the file at that point. Those places have been
1212 * modified to actually do that so that the NT crt can be used there.
1214 * jaltman - As commented elsewhere, this cannot work because fopen()
1215 * does not open files with DELETE and FILE_SHARE_DELETE.
1217 code = nt_unlink(summaryFileName);
1219 code = unlink(summaryFileName);
1222 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1225 if (!canfork || debug || Fork() == 0) {
1226 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1228 OS_CLOSE(summaryFile);
1229 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1230 RemoveTheForce(salvinfo->fileSysPath);
1232 struct VolumeSummary *vsp;
1236 GetVolumeSummary(salvinfo, singleVolumeNumber);
1238 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1240 if (vsp->header.id == singleVolumeNumber) {
1243 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1249 MaybeAskOnline(salvinfo, singleVolumeNumber);
1251 /* make sure we get rid of stray .vol headers, even if
1252 * they're not in our volume summary (might happen if
1253 * e.g. something else created them and they're not in the
1254 * fileserver VGC) */
1255 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1256 singleVolumeNumber, 0 /*parent*/);
1257 AskDelete(salvinfo, singleVolumeNumber);
1261 Log("%s vice inodes on %s; not salvaged\n",
1262 singleVolumeNumber ? "No applicable" : "No", dev);
1267 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1269 OS_CLOSE(summaryFile);
1271 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1274 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1275 OS_CLOSE(summaryFile);
1276 Abort("Unable to read inode table; %s not salvaged\n", dev);
1278 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1279 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1280 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1281 OS_CLOSE(summaryFile);
1282 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1287 CountVolumeInodes(ip, nInodes, &summary);
1288 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1289 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1290 OS_CLOSE(summaryFile);
1294 summary.index += (summary.nInodes);
1295 nInodes -= summary.nInodes;
1296 ip += summary.nInodes;
1299 ip = ip_save = NULL;
1300 /* Following fflush is not fclose, because if it was debug mode would not work */
1301 if (OS_SYNC(summaryFile) == -1) {
1302 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1303 OS_CLOSE(summaryFile);
1307 if (canfork && !debug) {
1311 if (Wait("Inode summary") == -1) {
1312 OS_CLOSE(summaryFile);
1313 Exit(1); /* salvage of this partition aborted */
1317 st_size = OS_SIZE(summaryFile);
1318 opr_Assert(st_size >= 0);
1321 salvinfo->inodeSummary = malloc(st_size);
1322 opr_Assert(salvinfo->inodeSummary != NULL);
1323 /* For GNU we need to do lseek to get the file pointer moved. */
1324 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1325 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1326 opr_Assert(ret == st_size);
1328 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1329 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1330 salvinfo->inodeSummary[i].volSummary = NULL;
1332 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1333 OS_CLOSE(summaryFile);
1336 if (retcode && singleVolumeNumber && !deleted) {
1337 AskError(salvinfo, singleVolumeNumber);
1343 /* Comparison routine for volume sort.
1344 This is setup so that a read-write volume comes immediately before
1345 any read-only clones of that volume */
1347 CompareVolumes(const void *_p1, const void *_p2)
1349 const struct VolumeSummary *p1 = _p1;
1350 const struct VolumeSummary *p2 = _p2;
1351 if (p1->header.parent != p2->header.parent)
1352 return p1->header.parent < p2->header.parent ? -1 : 1;
1353 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1355 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1357 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1361 * Gleans volumeSummary information by asking the fileserver
1363 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1364 * salvaging a whole partition
1366 * @return whether we obtained the volume summary information or not
1367 * @retval 0 success; we obtained the volume summary information
1368 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1370 * @retval 1 we did not get the volume summary information; either the
1371 * fileserver responded with an error, or we are not supposed to
1372 * ask the fileserver for the information (e.g. we are salvaging
1373 * the entire partition or we are not the salvageserver)
1375 * @note for non-DAFS, always returns 1
1378 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1381 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1382 if (programType == salvageServer) {
1383 if (singleVolumeNumber) {
1384 FSSYNC_VGQry_response_t q_res;
1386 struct VolumeSummary *vsp;
1388 struct VolumeDiskHeader diskHdr;
1390 memset(&res, 0, sizeof(res));
1392 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1395 * We must wait for the partition to finish scanning before
1396 * can continue, since we will not know if we got the entire
1397 * VG membership unless the partition is fully scanned.
1398 * We could, in theory, just scan the partition ourselves if
1399 * the VG cache is not ready, but we would be doing the exact
1400 * same scan the fileserver is doing; it will almost always
1401 * be faster to wait for the fileserver. The only exceptions
1402 * are if the partition does not take very long to scan, and
1403 * in that case it's fast either way, so who cares?
1405 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1406 Log("waiting for fileserver to finish scanning partition %s...\n",
1407 salvinfo->fileSysPartition->name);
1409 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1410 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1411 * just so small partitions don't need to wait over 10
1412 * seconds every time, and large partitions are generally
1413 * polled only once every ten seconds. */
1414 sleep((i > 10) ? (i = 10) : i);
1416 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1420 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1421 /* This can happen if there's no header for the volume
1422 * we're salvaging, or no headers exist for the VG (if
1423 * we're salvaging an RW). Act as if we got a response
1424 * with no VG members. The headers may be created during
1425 * salvaging, if there are inodes in this VG. */
1427 memset(&q_res, 0, sizeof(q_res));
1428 q_res.rw = singleVolumeNumber;
1432 Log("fileserver refused VGCQuery request for volume %" AFS_VOLID_FMT " on "
1433 "partition %s, code %ld reason %ld\n",
1434 afs_printable_VolumeId_lu(singleVolumeNumber),
1435 salvinfo->fileSysPartition->name,
1436 afs_printable_int32_ld(code),
1437 afs_printable_int32_ld(res.hdr.reason));
1441 if (q_res.rw != singleVolumeNumber) {
1442 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1443 afs_printable_VolumeId_lu(singleVolumeNumber),
1444 afs_printable_VolumeId_lu(q_res.rw));
1445 #ifdef SALVSYNC_BUILD_CLIENT
1446 if (SALVSYNC_LinkVolume(q_res.rw,
1448 salvinfo->fileSysPartition->name,
1450 Log("schedule request failed\n");
1452 #endif /* SALVSYNC_BUILD_CLIENT */
1453 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1456 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1457 opr_Assert(salvinfo->volumeSummaryp != NULL);
1459 salvinfo->nVolumes = 0;
1460 vsp = salvinfo->volumeSummaryp;
1462 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1463 char name[VMAXPATHLEN];
1465 if (!q_res.children[i]) {
1469 /* AskOffline for singleVolumeNumber was called much earlier */
1470 if (q_res.children[i] != singleVolumeNumber) {
1471 AskOffline(salvinfo, q_res.children[i]);
1472 if (LockVolume(salvinfo, q_res.children[i])) {
1478 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1480 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1481 afs_printable_uint32_lu(q_res.children[i]));
1486 DiskToVolumeHeader(&vsp->header, &diskHdr);
1487 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1489 salvinfo->nVolumes++;
1493 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1498 Log("Cannot get volume summary from fileserver; falling back to scanning "
1499 "entire partition\n");
1502 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1507 * count how many volume headers are found by VWalkVolumeHeaders.
1509 * @param[in] dp the disk partition (unused)
1510 * @param[in] name full path to the .vol header (unused)
1511 * @param[in] hdr the header data (unused)
1512 * @param[in] last whether this is the last try or not (unused)
1513 * @param[in] rock actually an afs_int32*; the running count of how many
1514 * volumes we have found
1519 CountHeader(struct DiskPartition64 *dp, const char *name,
1520 struct VolumeDiskHeader *hdr, int last, void *rock)
1522 afs_int32 *nvols = (afs_int32 *)rock;
1528 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1531 struct SalvageScanParams {
1532 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1533 * vol id of the VG we're salvaging */
1534 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1535 * we're filling in */
1536 afs_int32 nVolumes; /**< # of vols we've encountered */
1537 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1538 * # of vols we've alloc'd memory for) */
1539 int retry; /**< do we need to retry vol lock/checkout? */
1540 struct SalvInfo *salvinfo; /**< salvage job info */
1544 * records volume summary info found from VWalkVolumeHeaders.
1546 * Found volumes are also taken offline if they are in the specific volume
1547 * group we are looking for.
1549 * @param[in] dp the disk partition
1550 * @param[in] name full path to the .vol header
1551 * @param[in] hdr the header data
1552 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1553 * @param[in] rock actually a struct SalvageScanParams*, containing the
1554 * information needed to record the volume summary data
1556 * @return operation status
1558 * @retval -1 volume locking raced with fileserver restart; checking out
1559 * and locking volumes needs to be retried
1560 * @retval 1 volume header is mis-named and should be deleted
1563 RecordHeader(struct DiskPartition64 *dp, const char *name,
1564 struct VolumeDiskHeader *hdr, int last, void *rock)
1566 char nameShouldBe[64];
1567 struct SalvageScanParams *params;
1568 struct VolumeSummary summary;
1569 VolumeId singleVolumeNumber;
1570 struct SalvInfo *salvinfo;
1572 params = (struct SalvageScanParams *)rock;
1574 memset(&summary, 0, sizeof(summary));
1576 singleVolumeNumber = params->singleVolumeNumber;
1577 salvinfo = params->salvinfo;
1579 DiskToVolumeHeader(&summary.header, hdr);
1581 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1582 && summary.header.parent != singleVolumeNumber) {
1584 if (programType == salvageServer) {
1585 #ifdef SALVSYNC_BUILD_CLIENT
1586 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1587 afs_printable_VolumeId_lu(summary.header.id),
1588 afs_printable_VolumeId_lu(summary.header.parent));
1589 if (SALVSYNC_LinkVolume(summary.header.parent,
1593 Log("schedule request failed\n");
1596 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1599 Log("%" AFS_VOLID_FMT " is a read-only volume; not salvaged\n",
1600 afs_printable_VolumeId_lu(singleVolumeNumber));
1605 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1606 || summary.header.parent == singleVolumeNumber) {
1608 /* check if the header file is incorrectly named */
1610 const char *base = strrchr(name, OS_DIRSEPC);
1617 snprintf(nameShouldBe, sizeof nameShouldBe,
1618 VFORMAT, afs_printable_VolumeId_lu(summary.header.id));
1621 if (strcmp(nameShouldBe, base)) {
1622 /* .vol file has wrong name; retry/delete */
1626 if (!badname || last) {
1627 /* only offline the volume if the header is good, or if this is
1628 * the last try looking at it; avoid AskOffline'ing the same vol
1631 if (singleVolumeNumber
1632 && summary.header.id != singleVolumeNumber) {
1633 /* don't offline singleVolumeNumber; we already did that
1636 AskOffline(salvinfo, summary.header.id);
1638 #ifdef AFS_DEMAND_ATTACH_FS
1640 /* don't lock the volume if the header is bad, since we're
1641 * about to delete it anyway. */
1642 if (LockVolume(salvinfo, summary.header.id)) {
1647 #endif /* AFS_DEMAND_ATTACH_FS */
1651 if (last && !Showmode) {
1652 Log("Volume header file %s is incorrectly named (should be %s "
1653 "not %s); %sdeleted (it will be recreated later, if "
1654 "necessary)\n", name, nameShouldBe, base,
1655 (Testing ? "it would have been " : ""));
1663 if (params->nVolumes > params->totalVolumes) {
1664 /* We found more volumes than we found on the first partition walk;
1665 * apparently something created a volume while we were
1666 * partition-salvaging, or we found more than 20 vols when salvaging a
1667 * particular volume. Abort if we detect this, since other programs
1668 * supposed to not touch the partition while it is partition-salvaging,
1669 * and we shouldn't find more than 20 vols in a VG.
1671 Abort("Found %ld vol headers, but should have found at most %ld! "
1672 "Make sure the volserver/fileserver are not running at the "
1673 "same time as a partition salvage\n",
1674 afs_printable_int32_ld(params->nVolumes),
1675 afs_printable_int32_ld(params->totalVolumes));
1678 memcpy(params->vsp, &summary, sizeof(summary));
1686 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1688 * If the header could not be read in at all, the header is always unlinked.
1689 * If instead RecordHeader said the header was bad (that is, the header file
1690 * is mis-named), we only unlink if we are doing a partition salvage, as
1691 * opposed to salvaging a specific volume group.
1693 * @param[in] dp the disk partition
1694 * @param[in] name full path to the .vol header
1695 * @param[in] hdr header data, or NULL if the header could not be read
1696 * @param[in] rock actually a struct SalvageScanParams*, with some information
1700 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1701 struct VolumeDiskHeader *hdr, void *rock)
1703 struct SalvageScanParams *params;
1706 params = (struct SalvageScanParams *)rock;
1709 /* no header; header is too bogus to read in at all */
1711 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1717 } else if (!params->singleVolumeNumber) {
1718 /* We were able to read in a header, but RecordHeader said something
1719 * was wrong with it. We only unlink those if we are doing a partition
1726 if (dounlink && unlink(name)) {
1727 Log("Error %d while trying to unlink %s\n", errno, name);
1732 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1733 * the fileserver for VG information, or by scanning the /vicepX partition.
1735 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1736 * are salvaging, or 0 if this is a partition
1739 * @return operation status
1741 * @retval -1 we raced with a fileserver restart; checking out and locking
1742 * volumes must be retried
1745 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1747 afs_int32 nvols = 0;
1748 struct SalvageScanParams params;
1751 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1753 /* we successfully got the vol information from the fileserver; no
1754 * need to scan the partition */
1758 /* we need to retry volume checkout */
1762 if (!singleVolumeNumber) {
1763 /* Count how many volumes we have in /vicepX */
1764 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1767 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1772 nvols = VOL_VG_MAX_VOLS;
1775 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1776 opr_Assert(salvinfo->volumeSummaryp != NULL);
1778 params.singleVolumeNumber = singleVolumeNumber;
1779 params.vsp = salvinfo->volumeSummaryp;
1780 params.nVolumes = 0;
1781 params.totalVolumes = nvols;
1783 params.salvinfo = salvinfo;
1785 /* walk the partition directory of volume headers and record the info
1786 * about them; unlinking invalid headers */
1787 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1788 UnlinkHeader, ¶ms);
1790 /* we apparently need to retry checking-out/locking volumes */
1794 Abort("Failed to get volume header summary\n");
1796 salvinfo->nVolumes = params.nVolumes;
1798 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1804 #ifdef AFS_NAMEI_ENV
1805 /* Find the link table. This should be associated with the RW volume, even
1806 * if there is only an RO volume at this site.
1809 FindLinkHandle(struct InodeSummary *isp, int nVols,
1810 struct ViceInodeInfo *allInodes)
1813 struct ViceInodeInfo *ip;
1815 for (i = 0; i < nVols; i++) {
1816 ip = allInodes + isp[i].index;
1817 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1818 if (ip[j].u.special.volumeId == isp->RWvolumeId &&
1819 ip[j].u.special.parentId == isp->RWvolumeId &&
1820 ip[j].u.special.type == VI_LINKTABLE) {
1821 return ip[j].inodeNumber;
1829 CheckDupLinktable(struct SalvInfo *salvinfo, struct InodeSummary *isp, struct ViceInodeInfo *ip)
1832 if (ip->u.vnode.vnodeNumber != INODESPECIAL) {
1833 /* not a linktable; process as a normal file */
1836 if (ip->u.special.type != VI_LINKTABLE) {
1837 /* not a linktable; process as a normal file */
1841 /* make sure nothing inc/decs it */
1844 if (ip->u.special.volumeId == ip->u.special.parentId) {
1845 /* This is a little weird, but shouldn't break anything, and there is
1846 * no known way that this can happen; just do nothing, in case deleting
1847 * it would screw something up. */
1848 Log("Inode %s appears to be a valid linktable for id (%u), but it's not\n",
1849 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId);
1850 Log("the linktable for our volume group (%u). This is unusual, since\n",
1852 Log("there should only be one linktable per volume group. I'm leaving\n");
1853 Log("it alone, just to be safe.\n");
1857 Log("Linktable %s appears to be invalid (parentid/volumeid mismatch: %u != %u)\n",
1858 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId, ip->u.special.volumeId);
1860 Log("Would have deleted linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1865 Log("Deleting linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1866 IH_INIT(tmpH, salvinfo->fileSysDevice, isp->RWvolumeId, ip->inodeNumber);
1867 namei_HandleToName(&ufs_name, tmpH);
1868 if (unlink(ufs_name.n_path) < 0) {
1869 Log("Error %d unlinking path %s\n", errno, ufs_name.n_path);
1878 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1880 struct versionStamp version;
1883 if (!VALID_INO(ino))
1885 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1886 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1887 if (!VALID_INO(ino))
1889 ("Unable to allocate link table inode for volume %" AFS_VOLID_FMT " (error = %d)\n",
1890 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1891 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1892 fdP = IH_OPEN(salvinfo->VGLinkH);
1894 Abort("Can't open link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1895 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1897 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1898 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1899 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1901 version.magic = LINKTABLEMAGIC;
1902 version.version = LINKTABLEVERSION;
1904 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1906 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1907 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1909 FDH_REALLYCLOSE(fdP);
1911 /* If the volume summary exits (i.e., the V*.vol header file exists),
1912 * then set this inode there as well.
1914 if (isp->volSummary)
1915 isp->volSummary->header.linkTable = ino;
1924 SVGParms_t *parms = (SVGParms_t *) arg;
1925 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1930 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1933 pthread_attr_t tattr;
1937 /* Initialize per volume global variables, even if later code does so */
1938 salvinfo->VolumeChanged = 0;
1939 salvinfo->VGLinkH = NULL;
1940 salvinfo->VGLinkH_cnt = 0;
1941 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1943 parms.svgp_inodeSummaryp = isp;
1944 parms.svgp_count = nVols;
1945 parms.svgp_salvinfo = salvinfo;
1946 code = pthread_attr_init(&tattr);
1948 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1952 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1954 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1957 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1959 Log("Failed to create thread to salvage volume group %u\n",
1963 (void)pthread_join(tid, NULL);
1965 #endif /* AFS_NT40_ENV */
1968 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1970 struct ViceInodeInfo *inodes, *allInodes, *ip;
1971 int i, totalInodes, size, salvageTo;
1975 int dec_VGLinkH = 0;
1977 FdHandle_t *fdP = NULL;
1979 salvinfo->VGLinkH_cnt = 0;
1980 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1981 && isp->nSpecialInodes > 0);
1982 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1983 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1986 if (ShowMounts && !haveRWvolume)
1988 if (canfork && !debug && Fork() != 0) {
1989 (void)Wait("Salvage volume group");
1992 for (i = 0, totalInodes = 0; i < nVols; i++)
1993 totalInodes += isp[i].nInodes;
1994 size = totalInodes * sizeof(struct ViceInodeInfo);
1995 inodes = malloc(size);
1996 allInodes = inodes - isp->index; /* this would the base of all the inodes
1997 * for the partition, if all the inodes
1998 * had been read into memory */
2000 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
2002 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
2004 /* Don't try to salvage a read write volume if there isn't one on this
2006 salvageTo = haveRWvolume ? 0 : 1;
2008 #ifdef AFS_NAMEI_ENV
2009 ino = FindLinkHandle(isp, nVols, allInodes);
2010 if (VALID_INO(ino)) {
2011 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
2012 fdP = IH_OPEN(salvinfo->VGLinkH);
2014 if (VALID_INO(ino) && fdP != NULL) {
2015 struct versionStamp header;
2016 afs_sfsize_t nBytes;
2018 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
2019 if (nBytes != sizeof(struct versionStamp)
2020 || header.magic != LINKTABLEMAGIC) {
2021 Log("Bad linktable header for volume %" AFS_VOLID_FMT ".\n", afs_printable_VolumeId_lu(isp->RWvolumeId));
2022 FDH_REALLYCLOSE(fdP);
2026 if (!VALID_INO(ino) || fdP == NULL) {
2027 Log("%s link table for volume %" AFS_VOLID_FMT ".\n",
2028 Testing ? "Would have recreated" : "Recreating", afs_printable_VolumeId_lu(isp->RWvolumeId));
2030 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2033 struct ViceInodeInfo *ip;
2034 CreateLinkTable(salvinfo, isp, ino);
2035 fdP = IH_OPEN(salvinfo->VGLinkH);
2036 /* Sync fake 1 link counts to the link table, now that it exists */
2038 for (i = 0; i < nVols; i++) {
2039 ip = allInodes + isp[i].index;
2040 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
2041 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 0);
2042 ip[j].linkCount = 1;
2049 FDH_REALLYCLOSE(fdP);
2051 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2054 /* Salvage in reverse order--read/write volume last; this way any
2055 * Inodes not referenced by the time we salvage the read/write volume
2056 * can be picked up by the read/write volume */
2057 /* ACTUALLY, that's not done right now--the inodes just vanish */
2058 for (i = nVols - 1; i >= salvageTo; i--) {
2060 struct InodeSummary *lisp = &isp[i];
2061 #ifdef AFS_NAMEI_ENV
2062 if (rw && (nVols > 1 || isp[i].nSpecialInodes == isp[i].nInodes)) {
2063 /* If nVols > 1, we have more than one vol in this volgroup, so
2064 * the RW inodes we detected may just be for the linktable, and
2065 * there is no actual RW volume.
2067 * Additionally, if we only have linktable inodes (no other
2068 * special inodes, no data inodes), there is also no actual RW
2069 * volume to salvage; this is just cruft left behind by something
2070 * else. In that case nVols will only be 1, though, so also
2071 * perform this linktables-only check if we don't have any
2072 * non-special inodes. */
2074 int all_linktables = 1;
2075 for (inode_i = 0; inode_i < isp[i].nSpecialInodes; inode_i++) {
2076 if (inodes[inode_i].u.special.type != VI_LINKTABLE) {
2081 if (all_linktables) {
2082 /* All we have are linktable special inodes, so skip salvaging
2083 * the RW; there was never an RW volume here. If we don't do
2084 * this, we risk creating a new "phantom" RW that the VLDB
2085 * doesn't know about, which is confusing and can cause
2093 Log("%s VOLUME %" AFS_VOLID_FMT "%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2094 afs_printable_VolumeId_lu(lisp->volumeId), (Testing ? "(READONLY mode)" : ""));
2095 /* Check inodes twice. The second time do things seriously. This
2096 * way the whole RO volume can be deleted, below, if anything goes wrong */
2097 for (check = 1; check >= 0; check--) {
2099 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2101 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2102 if (rw && deleteMe) {
2103 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2104 * volume won't be called */
2110 if (rw && check == 1)
2112 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2113 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2119 /* Fix actual inode counts */
2122 Log("totalInodes %d\n",totalInodes);
2123 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2124 static int TraceBadLinkCounts = 0;
2125 #ifdef AFS_NAMEI_ENV
2126 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2127 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2128 VGLinkH_p1 = ip->u.param[0];
2129 continue; /* Deal with this last. */
2130 } else if (CheckDupLinktable(salvinfo, isp, ip)) {
2131 /* Don't touch this inode; CheckDupLinktable has handled it */
2135 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2136 TraceBadLinkCounts--; /* Limit reports, per volume */
2137 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]); /* VolumeId in param */
2140 /* If ip->linkCount is non-zero at this point, then the linkcount
2141 * for the inode on disk is wrong. Initially linkCount is set to
2142 * the actual link count of the inode on disk, and then we (the
2143 * salvager) decrement it for every reference to that inode that we
2144 * find. So if linkCount is still positive by this point, it means
2145 * that the linkcount on disk is too high, so we should DEC the
2146 * inode. If linkCount is negative, it means the linkcount is too
2147 * low, so we should INC the inode.
2149 * If we get an error while INC'ing or DEC'ing, that's a little
2150 * odd and indicates a bug, but try to continue anyway, so the
2151 * volume may still be made accessible. */
2152 while (ip->linkCount > 0) {
2154 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2155 Log("idec failed. inode %s errno %d\n",
2156 PrintInode(stmp, ip->inodeNumber), errno);
2162 while (ip->linkCount < 0) {
2164 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2165 Log("iinc failed. inode %s errno %d\n",
2166 PrintInode(stmp, ip->inodeNumber), errno);
2173 #ifdef AFS_NAMEI_ENV
2174 while (dec_VGLinkH > 0) {
2175 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2176 Log("idec failed on link table, errno = %d\n", errno);
2180 while (dec_VGLinkH < 0) {
2181 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2182 Log("iinc failed on link table, errno = %d\n", errno);
2189 /* Directory consistency checks on the rw volume */
2191 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2192 IH_RELEASE(salvinfo->VGLinkH);
2194 if (canfork && !debug) {
2200 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2202 /* Check headers BEFORE forking */
2206 for (i = 0; i < nVols; i++) {
2207 struct VolumeSummary *vs = isp[i].volSummary;
2208 VolumeDiskData volHeader;
2210 /* Don't salvage just because phantom rw volume is there... */
2211 /* (If a read-only volume exists, read/write inodes must also exist) */
2212 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2216 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2217 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2218 == sizeof(volHeader)
2219 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2220 && volHeader.dontSalvage == DONT_SALVAGE
2221 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2222 if (volHeader.inUse != 0) {
2223 volHeader.inUse = 0;
2224 volHeader.inService = 1;
2226 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2227 != sizeof(volHeader)) {
2243 /* SalvageVolumeHeaderFile
2245 * Salvage the top level V*.vol header file. Make sure the special files
2246 * exist and that there are no duplicates.
2248 * Calls SalvageHeader for each possible type of volume special file.
2252 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2253 struct ViceInodeInfo *inodes, int RW,
2254 int check, int *deleteMe)
2257 struct ViceInodeInfo *ip;
2258 int allinodesobsolete = 1;
2259 struct VolumeDiskHeader diskHeader;
2260 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2262 struct VolumeHeader tempHeader;
2263 struct afs_inode_info stuff[MAXINODETYPE];
2265 /* keeps track of special inodes that are probably 'good'; they are
2266 * referenced in the vol header, and are included in the given inodes
2271 } goodspecial[MAXINODETYPE];
2276 memset(goodspecial, 0, sizeof(goodspecial));
2278 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2280 Log("cannot allocate memory for inode skip array when salvaging "
2281 "volume %lu; not performing duplicate special inode recovery\n",
2282 afs_printable_uint32_lu(isp->volumeId));
2283 /* still try to perform the salvage; the skip array only does anything
2284 * if we detect duplicate special inodes */
2287 init_inode_info(&tempHeader, stuff);
2290 * First, look at the special inodes and see if any are referenced by
2291 * the existing volume header. If we find duplicate special inodes, we
2292 * can use this information to use the referenced inode (it's more
2293 * likely to be the 'good' one), and throw away the duplicates.
2295 if (isp->volSummary && skip) {
2296 /* use tempHeader, so we can use the stuff[] array to easily index
2297 * into the isp->volSummary special inodes */
2298 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2300 for (i = 0; i < isp->nSpecialInodes; i++) {
2301 ip = &inodes[isp->index + i];
2302 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2303 /* will get taken care of in a later loop */
2306 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2307 goodspecial[ip->u.special.type-1].valid = 1;
2308 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2313 memset(&tempHeader, 0, sizeof(tempHeader));
2314 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2315 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2316 tempHeader.id = isp->volumeId;
2317 tempHeader.parent = isp->RWvolumeId;
2319 /* Check for duplicates (inodes are sorted by type field) */
2320 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2321 ip = &inodes[isp->index + i];
2322 if (ip->u.special.type == (ip + 1)->u.special.type) {
2323 afs_ino_str_t stmp1, stmp2;
2325 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2326 /* Will be caught in the loop below */
2330 Log("Duplicate special %d inodes for volume %" AFS_VOLID_FMT " found (%s, %s);\n",
2331 ip->u.special.type, afs_printable_VolumeId_lu(isp->volumeId),
2332 PrintInode(stmp1, ip->inodeNumber),
2333 PrintInode(stmp2, (ip+1)->inodeNumber));
2335 if (skip && goodspecial[ip->u.special.type-1].valid) {
2336 Inode gi = goodspecial[ip->u.special.type-1].inode;
2339 Log("using special inode referenced by vol header (%s)\n",
2340 PrintInode(stmp1, gi));
2343 /* the volume header references some special inode of
2344 * this type in the inodes array; are we it? */
2345 if (ip->inodeNumber != gi) {
2347 } else if ((ip+1)->inodeNumber != gi) {
2348 /* in case this is the last iteration; we need to
2349 * make sure we check ip+1, too */
2354 Log("cannot determine which is correct; salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
2362 for (i = 0; i < isp->nSpecialInodes; i++) {
2364 ip = &inodes[isp->index + i];
2365 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2367 Log("Rubbish header inode %s of type %d\n",
2368 PrintInode(stmp, ip->inodeNumber),
2369 ip->u.special.type);
2375 Log("Rubbish header inode %s of type %d; deleted\n",
2376 PrintInode(stmp, ip->inodeNumber),
2377 ip->u.special.type);
2378 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2379 if (skip && skip[i]) {
2380 if (orphans == ORPH_REMOVE) {
2381 Log("Removing orphan special inode %s of type %d\n",
2382 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2385 Log("Ignoring orphan special inode %s of type %d\n",
2386 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2387 /* fall through to the ip->linkCount--; line below */
2390 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2391 allinodesobsolete = 0;
2393 if (!check && ip->u.special.type != VI_LINKTABLE)
2394 ip->linkCount--; /* Keep the inode around */
2402 if (allinodesobsolete) {
2409 salvinfo->VGLinkH_cnt++; /* one for every header. */
2411 if (!RW && !check && isp->volSummary) {
2412 ClearROInUseBit(isp->volSummary);
2416 for (i = 0; i < MAXINODETYPE; i++) {
2417 if (stuff[i].inodeType == VI_LINKTABLE) {
2418 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2419 * And we may have recreated the link table earlier, so set the
2420 * RW header as well. The header magic was already checked.
2422 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2423 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2427 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2431 if (isp->volSummary == NULL) {
2433 char headerName[64];
2434 snprintf(headerName, sizeof headerName, VFORMAT,
2435 afs_printable_VolumeId_lu(isp->volumeId));
2436 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2437 salvinfo->fileSysPath, headerName);
2439 Log("No header file for volume %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(isp->volumeId));
2443 Log("No header file for volume %" AFS_VOLID_FMT "; %screating %s\n",
2444 afs_printable_VolumeId_lu(isp->volumeId), (Testing ? "it would have been " : ""),
2446 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2448 writefunc = VCreateVolumeDiskHeader;
2451 char headerName[64];
2452 /* hack: these two fields are obsolete... */
2453 isp->volSummary->header.volumeAcl = 0;
2454 isp->volSummary->header.volumeMountTable = 0;
2457 (&isp->volSummary->header, &tempHeader,
2458 sizeof(struct VolumeHeader))) {
2459 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2460 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2461 salvinfo->fileSysPath, headerName);
2463 Log("Header file %s is damaged or no longer valid%s\n", path,
2464 (check ? "" : "; repairing"));
2468 writefunc = VWriteVolumeDiskHeader;
2472 memcpy(&isp->volSummary->header, &tempHeader,
2473 sizeof(struct VolumeHeader));
2476 Log("It would have written a new header file for volume %" AFS_VOLID_FMT "\n",
2477 afs_printable_VolumeId_lu(isp->volumeId));
2480 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2481 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2483 Log("Error %ld writing volume header file for volume %" AFS_VOLID_FMT "\n",
2484 afs_printable_int32_ld(code),
2485 afs_printable_VolumeId_lu(diskHeader.id));
2490 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2491 isp->volSummary->header.volumeInfo);
2496 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2497 struct InodeSummary *isp, int check, int *deleteMe)
2500 VolumeDiskData volumeInfo;
2501 struct versionStamp fileHeader;
2510 #ifndef AFS_NAMEI_ENV
2511 if (sp->inodeType == VI_LINKTABLE)
2512 return 0; /* header magic was already checked */
2514 if (*(sp->inode) == 0) {
2516 Log("Missing inode in volume header (%s)\n", sp->description);
2520 Log("Missing inode in volume header (%s); %s\n", sp->description,
2521 (Testing ? "it would have recreated it" : "recreating"));
2524 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2525 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2526 if (!VALID_INO(*(sp->inode)))
2528 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2529 sp->description, errno);
2534 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2535 fdP = IH_OPEN(specH);
2536 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2537 /* bail out early and destroy the volume */
2539 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2546 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2547 sp->description, errno);
2550 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2551 || header.fileHeader.magic != sp->stamp.magic)) {
2553 Log("Part of the header (%s) is corrupted\n", sp->description);
2554 FDH_REALLYCLOSE(fdP);
2558 Log("Part of the header (%s) is corrupted; recreating\n",
2561 /* header can be garbage; make sure we don't read garbage data from
2563 memset(&header, 0, sizeof(header));
2565 #ifdef AFS_NAMEI_ENV
2566 if (namei_FixSpecialOGM(fdP, check)) {
2567 Log("Error with namei header OGM data (%s)\n", sp->description);
2568 FDH_REALLYCLOSE(fdP);
2573 if (sp->inodeType == VI_VOLINFO
2574 && header.volumeInfo.destroyMe == DESTROY_ME) {
2577 FDH_REALLYCLOSE(fdP);
2581 if (recreate && !Testing) {
2584 ("Internal error: recreating volume header (%s) in check mode\n",
2586 nBytes = FDH_TRUNC(fdP, 0);
2588 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2589 sp->description, errno);
2591 /* The following code should be moved into vutil.c */
2592 if (sp->inodeType == VI_VOLINFO) {
2594 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2595 header.volumeInfo.stamp = sp->stamp;
2596 header.volumeInfo.id = isp->volumeId;
2597 header.volumeInfo.parentId = isp->RWvolumeId;
2598 sprintf(header.volumeInfo.name, "bogus.%" AFS_VOLID_FMT, afs_printable_VolumeId_lu(isp->volumeId));
2599 Log("Warning: the name of volume %" AFS_VOLID_FMT " is now \"bogus.%" AFS_VOLID_FMT "\"\n",
2600 afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->volumeId));
2601 header.volumeInfo.inService = 0;
2602 header.volumeInfo.blessed = 0;
2603 /* The + 1000 is a hack in case there are any files out in venus caches */
2604 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2605 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2606 header.volumeInfo.needsCallback = 0;
2607 gettimeofday(&tp, NULL);
2608 header.volumeInfo.creationDate = tp.tv_sec;
2610 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2611 sizeof(header.volumeInfo), 0);
2612 if (nBytes != sizeof(header.volumeInfo)) {
2615 ("Unable to write volume header file (%s) (errno = %d)\n",
2616 sp->description, errno);
2617 Abort("Unable to write entire volume header file (%s)\n",
2621 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2622 if (nBytes != sizeof(sp->stamp)) {
2625 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2626 sp->description, errno);
2628 ("Unable to write entire version stamp in volume header file (%s)\n",
2633 FDH_REALLYCLOSE(fdP);
2635 if (sp->inodeType == VI_VOLINFO) {
2636 salvinfo->VolInfo = header.volumeInfo;
2640 if (salvinfo->VolInfo.updateDate) {
2641 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2643 Log("%s (%" AFS_VOLID_FMT ") %supdated %s\n", salvinfo->VolInfo.name,
2644 afs_printable_VolumeId_lu(salvinfo->VolInfo.id),
2645 (Testing ? "it would have been " : ""), update);
2647 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2649 Log("%s (%" AFS_VOLID_FMT ") not updated (created %s)\n",
2650 salvinfo->VolInfo.name, afs_printable_VolumeId_lu(salvinfo->VolInfo.id), update);
2660 SalvageVnodes(struct SalvInfo *salvinfo,
2661 struct InodeSummary *rwIsp,
2662 struct InodeSummary *thisIsp,
2663 struct ViceInodeInfo *inodes, int check)
2665 int ilarge, ismall, ioffset, RW, nInodes;
2666 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2669 RW = (rwIsp == thisIsp);
2670 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2672 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2673 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2674 if (check && ismall == -1)
2677 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2678 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2679 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2683 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2684 struct ViceInodeInfo *ip, int nInodes,
2685 struct VolumeSummary *volSummary, int check)
2687 char buf[SIZEOF_LARGEDISKVNODE];
2688 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2690 StreamHandle_t *file;
2691 struct VnodeClassInfo *vcp;
2693 afs_sfsize_t nVnodes;
2694 afs_fsize_t vnodeLength;
2696 afs_ino_str_t stmp1, stmp2;
2700 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2701 fdP = IH_OPEN(handle);
2702 opr_Assert(fdP != NULL);
2703 file = FDH_FDOPEN(fdP, "r+");
2704 opr_Assert(file != NULL);
2705 vcp = &VnodeClassInfo[class];
2706 size = OS_SIZE(fdP->fd_fd);
2707 opr_Assert(size != -1);
2708 nVnodes = (size / vcp->diskSize) - 1;
2710 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2711 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2715 for (vnodeIndex = 0;
2716 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2717 nVnodes--, vnodeIndex++) {
2718 if (vnode->type != vNull) {
2719 int vnodeChanged = 0;
2720 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2721 if (VNDISK_GET_INO(vnode) == 0) {
2723 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2724 memset(vnode, 0, vcp->diskSize);
2728 if (vcp->magic != vnode->vnodeMagic) {
2729 /* bad magic #, probably partially created vnode */
2731 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2732 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2733 afs_printable_uint32_lu(vcp->magic));
2734 memset(vnode, 0, vcp->diskSize);
2738 Log("Partially allocated vnode %d deleted.\n",
2740 memset(vnode, 0, vcp->diskSize);
2744 /* ****** Should do a bit more salvage here: e.g. make sure
2745 * vnode type matches what it should be given the index */
2746 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2747 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2748 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2749 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2756 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2757 /* The following doesn't work, because the version number
2758 * is not maintained correctly by the file server */
2759 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2760 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2762 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2768 /* For RW volume, look for vnode with matching inode number;
2769 * if no such match, take the first determined by our sort
2771 struct ViceInodeInfo *lip = ip;
2772 int lnInodes = nInodes;
2774 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2775 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2784 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2785 /* "Matching" inode */
2789 vu = vnode->uniquifier;
2790 iu = ip->u.vnode.vnodeUniquifier;
2791 vd = vnode->dataVersion;
2792 id = ip->u.vnode.inodeDataVersion;
2794 * Because of the possibility of the uniquifier overflows (> 4M)
2795 * we compare them modulo the low 22-bits; we shouldn't worry
2796 * about mismatching since they shouldn't to many old
2797 * uniquifiers of the same vnode...
2799 if (IUnique(vu) != IUnique(iu)) {
2801 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2804 vnode->uniquifier = iu;
2805 #ifdef AFS_3DISPARES
2806 vnode->dataVersion = (id >= vd ?
2809 1887437 ? vd : id) :
2812 1887437 ? id : vd));
2814 #if defined(AFS_SGI_EXMAG)
2815 vnode->dataVersion = (id >= vd ?
2818 15099494 ? vd : id) :
2821 15099494 ? id : vd));
2823 vnode->dataVersion = (id > vd ? id : vd);
2824 #endif /* AFS_SGI_EXMAG */
2825 #endif /* AFS_3DISPARES */
2828 /* don't bother checking for vd > id any more, since
2829 * partial file transfers always result in this state,
2830 * and you can't do much else anyway (you've already
2831 * found the best data you can) */
2832 #ifdef AFS_3DISPARES
2833 if (!vnodeIsDirectory(vnodeNumber)
2834 && ((vd < id && (id - vd) < 1887437)
2835 || ((vd > id && (vd - id) > 1887437)))) {
2837 #if defined(AFS_SGI_EXMAG)
2838 if (!vnodeIsDirectory(vnodeNumber)
2839 && ((vd < id && (id - vd) < 15099494)
2840 || ((vd > id && (vd - id) > 15099494)))) {
2842 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2843 #endif /* AFS_SGI_EXMAG */
2846 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2847 vnode->dataVersion = id;
2852 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2855 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2857 VNDISK_SET_INO(vnode, ip->inodeNumber);
2862 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2864 VNDISK_SET_INO(vnode, ip->inodeNumber);
2867 VNDISK_GET_LEN(vnodeLength, vnode);
2868 if (ip->byteCount != vnodeLength) {
2871 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2876 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2877 VNDISK_SET_LEN(vnode, ip->byteCount);
2881 ip->linkCount--; /* Keep the inode around */
2884 } else { /* no matching inode */
2886 if (VNDISK_GET_INO(vnode) != 0
2887 || vnode->type == vDirectory) {
2888 /* No matching inode--get rid of the vnode */
2890 if (VNDISK_GET_INO(vnode)) {
2892 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2896 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2901 if (VNDISK_GET_INO(vnode)) {
2903 time_t serverModifyTime = vnode->serverModifyTime;
2904 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2908 time_t serverModifyTime = vnode->serverModifyTime;
2909 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2912 memset(vnode, 0, vcp->diskSize);
2915 /* Should not reach here becuase we checked for
2916 * (inodeNumber == 0) above. And where we zero the vnode,
2917 * we also goto vnodeDone.
2921 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2925 } /* VNDISK_GET_INO(vnode) != 0 */
2927 opr_Assert(!(vnodeChanged && check));
2928 if (vnodeChanged && !Testing) {
2929 opr_Verify(IH_IWRITE(handle,
2930 vnodeIndexOffset(vcp, vnodeNumber),
2931 (char *)vnode, vcp->diskSize)
2933 salvinfo->VolumeChanged = 1; /* For break call back */
2944 struct VnodeEssence *
2945 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2948 struct VnodeInfo *vip;
2951 class = vnodeIdToClass(vnodeNumber);
2952 vip = &salvinfo->vnodeInfo[class];
2953 offset = vnodeIdToBitNumber(vnodeNumber);
2954 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2958 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2960 /* Copy the directory unconditionally if we are going to change it:
2961 * not just if was cloned.
2963 struct VnodeDiskObject vnode;
2964 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2965 Inode oldinode, newinode;
2968 if (dir->copied || Testing)
2970 DFlush(); /* Well justified paranoia... */
2973 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2974 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2976 opr_Assert(code == sizeof(vnode));
2977 oldinode = VNDISK_GET_INO(&vnode);
2978 /* Increment the version number by a whole lot to avoid problems with
2979 * clients that were promised new version numbers--but the file server
2980 * crashed before the versions were written to disk.
2983 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2984 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2986 opr_Assert(VALID_INO(newinode));
2987 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
2990 VNDISK_SET_INO(&vnode, newinode);
2992 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2993 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2995 opr_Assert(code == sizeof(vnode));
2997 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2998 salvinfo->fileSysDevice, newinode,
2999 &salvinfo->VolumeChanged);
3000 /* Don't delete the original inode right away, because the directory is
3001 * still being scanned.
3007 * This function should either successfully create a new dir, or give up
3008 * and leave things the way they were. In particular, if it fails to write
3009 * the new dir properly, it should return w/o changing the reference to the
3013 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
3015 struct VnodeDiskObject vnode;
3016 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
3017 Inode oldinode, newinode;
3022 afs_int32 parentUnique = 1;
3023 struct VnodeEssence *vnodeEssence;
3028 Log("Salvaging directory %u...\n", dir->vnodeNumber);
3030 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
3031 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3033 opr_Assert(lcode == sizeof(vnode));
3034 oldinode = VNDISK_GET_INO(&vnode);
3035 /* Increment the version number by a whole lot to avoid problems with
3036 * clients that were promised new version numbers--but the file server
3037 * crashed before the versions were written to disk.
3040 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
3041 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
3043 opr_Assert(VALID_INO(newinode));
3044 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
3045 &salvinfo->VolumeChanged);
3047 /* Assign . and .. vnode numbers from dir and vnode.parent.
3048 * The uniquifier for . is in the vnode.
3049 * The uniquifier for .. might be set to a bogus value of 1 and
3050 * the salvager will later clean it up.
3052 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
3053 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
3056 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
3058 (vnode.parent ? vnode.parent : dir->vnodeNumber),
3063 /* didn't really build the new directory properly, let's just give up. */
3064 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3065 Log("Directory salvage returned code %d, continuing.\n", code);
3067 Log("also failed to decrement link count on new inode");
3071 Log("Checking the results of the directory salvage...\n");
3072 if (!DirOK(&newdir)) {
3073 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3074 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3075 opr_Assert(code == 0);
3079 VNDISK_SET_INO(&vnode, newinode);
3080 length = afs_dir_Length(&newdir);
3081 VNDISK_SET_LEN(&vnode, length);
3083 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3084 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3086 opr_Assert(lcode == sizeof(vnode));
3087 IH_CONDSYNC(salvinfo->vnodeInfo[vLarge].handle);
3089 /* make sure old directory file is really closed */
3090 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3091 FDH_REALLYCLOSE(fdP);
3093 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3094 opr_Assert(code == 0);
3095 dir->dirHandle = newdir;
3099 * arguments for JudgeEntry.
3101 struct judgeEntry_params {
3102 struct DirSummary *dir; /**< directory we're examining entries in */
3103 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3107 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3110 struct judgeEntry_params *params = arock;
3111 struct DirSummary *dir = params->dir;
3112 struct SalvInfo *salvinfo = params->salvinfo;
3113 struct VnodeEssence *vnodeEssence;
3114 afs_int32 dirOrphaned, todelete;
3116 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3118 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3119 if (vnodeEssence == NULL) {
3121 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3124 CopyOnWrite(salvinfo, dir);
3125 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3130 #ifndef AFS_NAMEI_ENV
3131 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3132 * mount inode for the partition. If this inode were deleted, it would crash
3135 if (vnodeEssence->InodeNumber == 0) {
3136 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3138 CopyOnWrite(salvinfo, dir);
3139 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3146 if (!(vnodeNumber & 1) && !Showmode
3147 && !(vnodeEssence->count || vnodeEssence->unique
3148 || vnodeEssence->modeBits)) {
3149 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3150 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3151 vnodeNumber, unique,
3152 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3156 CopyOnWrite(salvinfo, dir);
3157 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3163 /* Check if the Uniquifiers match. If not, change the directory entry
3164 * so its unique matches the vnode unique. Delete if the unique is zero
3165 * or if the directory is orphaned.
3167 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3168 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3171 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3173 /* This is an orphaned directory. Don't delete the . or ..
3174 * entry. Otherwise, it will get created in the next
3175 * salvage and deleted again here. So Just skip it.
3179 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3180 * Entries arriving here should be deleted, but the directory
3181 * is not orphaned. Therefore, the entry must be pointing at
3182 * the wrong vnode. Skip the 'else' clause and fall through;
3183 * the code below will repair the entry so it correctly points
3184 * at the vnode of the current directory (if '.') or the parent
3185 * directory (if '..'). */
3188 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n",
3189 dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique,
3190 vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3194 fid.Vnode = vnodeNumber;
3195 fid.Unique = vnodeEssence->unique;
3196 CopyOnWrite(salvinfo, dir);
3197 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3199 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3202 return 0; /* no need to continue */
3206 if (strcmp(name, ".") == 0) {
3207 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3209 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3212 CopyOnWrite(salvinfo, dir);
3213 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3214 fid.Vnode = dir->vnodeNumber;
3215 fid.Unique = dir->unique;
3216 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3217 vnodeNumber = fid.Vnode; /* Get the new Essence */
3218 unique = fid.Unique;
3219 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3223 } else if (strcmp(name, "..") == 0) {
3226 struct VnodeEssence *dotdot;
3227 pa.Vnode = dir->parent;
3228 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3229 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3230 pa.Unique = dotdot->unique;
3232 pa.Vnode = dir->vnodeNumber;
3233 pa.Unique = dir->unique;
3235 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3237 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3239 CopyOnWrite(salvinfo, dir);
3240 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3241 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3244 vnodeNumber = pa.Vnode; /* Get the new Essence */
3246 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3248 dir->haveDotDot = 1;
3249 } else if (strncmp(name, ".__afs", 6) == 0) {
3251 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3254 CopyOnWrite(salvinfo, dir);
3255 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3257 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3258 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3261 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3262 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3263 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3264 && !(vnodeEssence->modeBits & 0111)) {
3265 afs_sfsize_t nBytes;
3271 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3272 vnodeEssence->InodeNumber);
3275 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3279 size = FDH_SIZE(fdP);
3281 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3282 FDH_REALLYCLOSE(fdP);
3289 nBytes = FDH_PREAD(fdP, buf, size, 0);
3290 if (nBytes == size) {
3292 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3293 Log("Volume %" AFS_VOLID_FMT " (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3294 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid), dir->vname, dir->name ? dir->name : "??", name, buf,
3295 Testing ? "would convert" : "converted");
3296 vnodeEssence->modeBits |= 0111;
3297 vnodeEssence->changed = 1;
3298 } else if (ShowMounts)
3299 Log("In volume %" AFS_VOLID_FMT " (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3300 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid),
3301 dir->vname, dir->name ? dir->name : "??", name, buf);
3303 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3304 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3306 FDH_REALLYCLOSE(fdP);
3309 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3310 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3311 if (vnodeIdToClass(vnodeNumber) == vLarge
3312 && vnodeEssence->name == NULL) {
3313 vnodeEssence->name = strdup(name);
3316 /* The directory entry points to the vnode. Check to see if the
3317 * vnode points back to the directory. If not, then let the
3318 * directory claim it (else it might end up orphaned). Vnodes
3319 * already claimed by another directory are deleted from this
3320 * directory: hardlinks to the same vnode are not allowed
3321 * from different directories.
3323 if (vnodeEssence->parent != dir->vnodeNumber) {
3324 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3325 /* Vnode does not point back to this directory.
3326 * Orphaned dirs cannot claim a file (it may belong to
3327 * another non-orphaned dir).
3330 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3332 vnodeEssence->parent = dir->vnodeNumber;
3333 vnodeEssence->changed = 1;
3335 /* Vnode was claimed by another directory */
3338 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3339 } else if (vnodeNumber == 1) {
3340 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3342 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3346 CopyOnWrite(salvinfo, dir);
3347 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3352 /* This directory claims the vnode */
3353 vnodeEssence->claimed = 1;
3355 vnodeEssence->count--;
3360 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3361 VnodeClass class, Inode ino, Unique * maxu)
3363 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3364 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3365 char buf[SIZEOF_LARGEDISKVNODE];
3366 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3368 StreamHandle_t *file;
3373 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3374 fdP = IH_OPEN(vip->handle);
3375 opr_Assert(fdP != NULL);
3376 file = FDH_FDOPEN(fdP, "r+");
3377 opr_Assert(file != NULL);
3378 size = OS_SIZE(fdP->fd_fd);
3379 opr_Assert(size != -1);
3380 vip->nVnodes = (size / vcp->diskSize) - 1;
3381 if (vip->nVnodes > 0) {
3382 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3383 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3384 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3385 sizeof(struct VnodeEssence)))
3387 if (class == vLarge) {
3388 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3398 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3399 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3400 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3401 nVnodes--, vnodeIndex++) {
3402 if (vnode->type != vNull) {
3403 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3404 afs_fsize_t vnodeLength;
3405 vip->nAllocatedVnodes++;
3406 vep->count = vnode->linkCount;
3407 VNDISK_GET_LEN(vnodeLength, vnode);
3408 vep->blockCount = nBlocks(vnodeLength);
3409 vip->volumeBlockCount += vep->blockCount;
3410 vep->parent = vnode->parent;
3411 vep->unique = vnode->uniquifier;
3412 if (*maxu < vnode->uniquifier)
3413 *maxu = vnode->uniquifier;
3414 vep->modeBits = vnode->modeBits;
3415 vep->InodeNumber = VNDISK_GET_INO(vnode);
3416 vep->type = vnode->type;
3417 vep->author = vnode->author;
3418 vep->owner = vnode->owner;
3419 vep->group = vnode->group;
3420 if (vnode->type == vDirectory) {
3421 if (class != vLarge) {
3422 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3423 vip->nAllocatedVnodes--;
3424 memset(vnode, 0, sizeof(*vnode));
3425 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3426 vnodeIndexOffset(vcp, vnodeNumber),
3427 (char *)&vnode, sizeof(vnode));
3428 salvinfo->VolumeChanged = 1;
3430 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3439 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3442 struct VnodeEssence *parentvp;
3448 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3449 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3450 strcat(path, OS_DIRSEP);
3451 strcat(path, vp->name);
3457 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3458 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3461 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3463 struct VnodeEssence *vep;
3466 return (1); /* Vnode zero does not exist */
3468 return (0); /* The root dir vnode is always claimed */
3469 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3470 if (!vep || !vep->claimed)
3471 return (1); /* Vnode is not claimed - it is orphaned */
3473 return (IsVnodeOrphaned(salvinfo, vep->parent));
3477 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3478 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3479 struct DirSummary *rootdir, int *rootdirfound)
3481 static struct DirSummary dir;
3482 static struct DirHandle dirHandle;
3483 struct VnodeEssence *parent;
3484 static char path[MAXPATHLEN];
3487 if (dirVnodeInfo->vnodes[i].salvaged)
3488 return; /* already salvaged */
3491 dirVnodeInfo->vnodes[i].salvaged = 1;
3493 if (dirVnodeInfo->inodes[i] == 0)
3494 return; /* Not allocated to a directory */
3496 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3497 if (dirVnodeInfo->vnodes[i].parent) {
3498 Log("Bad parent, vnode 1; %s...\n",
3499 (Testing ? "skipping" : "salvaging"));
3500 dirVnodeInfo->vnodes[i].parent = 0;
3501 dirVnodeInfo->vnodes[i].changed = 1;
3504 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3505 if (parent && parent->salvaged == 0)
3506 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3507 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3508 rootdir, rootdirfound);
3511 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3512 dir.unique = dirVnodeInfo->vnodes[i].unique;
3515 dir.parent = dirVnodeInfo->vnodes[i].parent;
3516 dir.haveDot = dir.haveDotDot = 0;
3517 dir.ds_linkH = alinkH;
3518 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3519 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3521 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3524 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3525 (Testing ? "skipping" : "salvaging"));
3528 CopyAndSalvage(salvinfo, &dir);
3530 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3533 dirHandle = dir.dirHandle;
3536 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3537 &dirVnodeInfo->vnodes[i], path);
3540 /* If enumeration failed for random reasons, we will probably delete
3541 * too much stuff, so we guard against this instead.
3543 struct judgeEntry_params judge_params;
3544 judge_params.salvinfo = salvinfo;
3545 judge_params.dir = &dir;
3547 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3548 &judge_params) == 0);
3551 /* Delete the old directory if it was copied in order to salvage.
3552 * CopyOnWrite has written the new inode # to the disk, but we still
3553 * have the old one in our local structure here. Thus, we idec the
3557 if (dir.copied && !Testing) {
3558 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3559 opr_Assert(code == 0);
3560 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3563 /* Remember rootdir DirSummary _after_ it has been judged */
3564 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3565 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3573 * Get a new FID that can be used to create a new file.
3575 * @param[in] volHeader vol header for the volume
3576 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3577 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3578 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3579 * updated to the new max unique if we create a new
3583 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3584 VnodeClass class, AFSFid *afid, Unique *maxunique)
3587 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3588 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3592 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3593 /* no free vnodes; make a new one */
3594 salvinfo->vnodeInfo[class].nVnodes++;
3595 salvinfo->vnodeInfo[class].vnodes =
3596 realloc(salvinfo->vnodeInfo[class].vnodes,
3597 sizeof(struct VnodeEssence) * (i+1));
3599 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3602 afid->Vnode = bitNumberToVnodeNumber(i, class);
3604 if (volHeader->uniquifier < (*maxunique + 1)) {
3605 /* header uniq is bad; it will get bumped by 2000 later */
3606 afid->Unique = *maxunique + 1 + 2000;
3609 /* header uniq seems okay; just use that */
3610 afid->Unique = *maxunique = volHeader->uniquifier++;
3615 * Create a vnode for a README file explaining not to use a recreated-root vol.
3617 * @param[in] volHeader vol header for the volume
3618 * @param[in] alinkH ihandle for i/o for the volume
3619 * @param[in] vid volume id
3620 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3621 * updated to the new max unique if we create a new
3623 * @param[out] afid FID for the new readme vnode
3624 * @param[out] ainode the inode for the new readme file
3626 * @return operation status
3631 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3632 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3636 struct VnodeDiskObject *rvnode = NULL;
3638 IHandle_t *readmeH = NULL;
3639 struct VnodeEssence *vep;
3641 time_t now = time(NULL);
3643 /* Try to make the note brief, but informative. Only administrators should
3644 * be able to read this file at first, so we can hopefully assume they
3645 * know what AFS is, what a volume is, etc. */
3647 "This volume has been salvaged, but has lost its original root directory.\n"
3648 "The root directory that exists now has been recreated from orphan files\n"
3649 "from the rest of the volume. This recreated root directory may interfere\n"
3650 "with old cached data on clients, and there is no way the salvager can\n"
3651 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3652 "use this volume, but only copy the salvaged data to a new volume.\n"
3653 "Continuing to use this volume as it exists now may cause some clients to\n"
3654 "behave oddly when accessing this volume.\n"
3655 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3656 /* ^ the person reading this probably just lost some data, so they could
3657 * use some cheering up. */
3659 /* -1 for the trailing NUL */
3660 length = sizeof(readme) - 1;
3662 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3664 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3666 /* create the inode and write the contents */
3667 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3668 salvinfo->fileSysPath, 0, vid,
3669 afid->Vnode, afid->Unique, 1);
3670 if (!VALID_INO(readmeinode)) {
3671 Log("CreateReadme: readme IH_CREATE failed\n");
3675 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3676 bytes = IH_IWRITE(readmeH, 0, readme, length);
3677 IH_RELEASE(readmeH);
3679 if (bytes != length) {
3680 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3681 (int)sizeof(readme));
3685 /* create the vnode and write it out */
3686 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3688 Log("CreateRootDir: error alloc'ing memory\n");
3692 rvnode->type = vFile;
3694 rvnode->modeBits = 0777;
3695 rvnode->linkCount = 1;
3696 VNDISK_SET_LEN(rvnode, length);
3697 rvnode->uniquifier = afid->Unique;
3698 rvnode->dataVersion = 1;
3699 VNDISK_SET_INO(rvnode, readmeinode);
3700 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3705 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3707 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3708 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3709 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3711 if (bytes != SIZEOF_SMALLDISKVNODE) {
3712 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3713 (int)SIZEOF_SMALLDISKVNODE);
3717 /* update VnodeEssence for new readme vnode */
3718 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3720 vep->blockCount = nBlocks(length);
3721 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3722 vep->parent = rvnode->parent;
3723 vep->unique = rvnode->uniquifier;
3724 vep->modeBits = rvnode->modeBits;
3725 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3726 vep->type = rvnode->type;
3727 vep->author = rvnode->author;
3728 vep->owner = rvnode->owner;
3729 vep->group = rvnode->group;
3739 *ainode = readmeinode;
3744 if (IH_DEC(alinkH, readmeinode, vid)) {
3745 Log("CreateReadme (recovery): IH_DEC failed\n");
3757 * create a root dir for a volume that lacks one.
3759 * @param[in] volHeader vol header for the volume
3760 * @param[in] alinkH ihandle for disk access for this volume group
3761 * @param[in] vid volume id we're dealing with
3762 * @param[out] rootdir populated with info about the new root dir
3763 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3764 * updated to the new max unique if we create a new
3767 * @return operation status
3772 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3773 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3777 int decroot = 0, decreadme = 0;
3778 AFSFid did, readmeid;
3781 struct VnodeDiskObject *rootvnode = NULL;
3782 struct acl_accessList *ACL;
3785 struct VnodeEssence *vep;
3786 Inode readmeinode = 0;
3787 time_t now = time(NULL);
3789 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3790 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3794 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3795 /* We don't have any large vnodes in the volume; allocate room
3796 * for one so we can recreate the root dir */
3797 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3798 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3799 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3801 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3802 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3805 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3806 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3807 if (vep->type != vNull) {
3808 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3812 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3813 &readmeinode) != 0) {
3818 /* set the DV to a very high number, so it is unlikely that we collide
3819 * with a cached DV */
3822 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3824 if (!VALID_INO(rootinode)) {
3825 Log("CreateRootDir: IH_CREATE failed\n");
3830 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3831 rootinode, &salvinfo->VolumeChanged);
3835 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3836 Log("CreateRootDir: MakeDir failed\n");
3839 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3840 Log("CreateRootDir: Create failed\n");
3844 length = afs_dir_Length(&rootdir->dirHandle);
3845 DZap(&rootdir->dirHandle);
3847 /* create the new root dir vnode */
3848 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3850 Log("CreateRootDir: malloc failed\n");
3854 /* only give 'rl' permissions to 'system:administrators'. We do this to
3855 * try to catch the attention of an administrator, that they should not
3856 * be writing to this directory or continue to use it. */
3857 ACL = VVnodeDiskACL(rootvnode);
3858 ACL->size = sizeof(struct acl_accessList);
3859 ACL->version = ACL_ACLVERSION;
3863 ACL->entries[0].id = -204; /* system:administrators */
3864 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3866 rootvnode->type = vDirectory;
3867 rootvnode->cloned = 0;
3868 rootvnode->modeBits = 0777;
3869 rootvnode->linkCount = 2;
3870 VNDISK_SET_LEN(rootvnode, length);
3871 rootvnode->uniquifier = 1;
3872 rootvnode->dataVersion = dv;
3873 VNDISK_SET_INO(rootvnode, rootinode);
3874 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3875 rootvnode->author = 0;
3876 rootvnode->owner = 0;
3877 rootvnode->parent = 0;
3878 rootvnode->group = 0;
3879 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3881 /* write it out to disk */
3882 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3883 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3884 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3886 if (bytes != SIZEOF_LARGEDISKVNODE) {
3887 /* just cast to int and don't worry about printing real 64-bit ints;
3888 * a large disk vnode isn't anywhere near the 32-bit limit */
3889 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3890 (int)SIZEOF_LARGEDISKVNODE);
3894 /* update VnodeEssence for the new root vnode */
3895 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3897 vep->blockCount = nBlocks(length);
3898 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3899 vep->parent = rootvnode->parent;
3900 vep->unique = rootvnode->uniquifier;
3901 vep->modeBits = rootvnode->modeBits;
3902 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3903 vep->type = rootvnode->type;
3904 vep->author = rootvnode->author;
3905 vep->owner = rootvnode->owner;
3906 vep->group = rootvnode->group;
3916 /* update DirSummary for the new root vnode */
3917 rootdir->vnodeNumber = 1;
3918 rootdir->unique = 1;
3919 rootdir->haveDot = 1;
3920 rootdir->haveDotDot = 1;
3921 rootdir->rwVid = vid;
3922 rootdir->copied = 0;
3923 rootdir->parent = 0;
3924 rootdir->name = strdup(".");
3925 rootdir->vname = volHeader->name;
3926 rootdir->ds_linkH = alinkH;
3933 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3934 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3936 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3937 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3947 * salvage a volume group.
3949 * @param[in] salvinfo information for the curent salvage job
3950 * @param[in] rwIsp inode summary for rw volume
3951 * @param[in] alinkH link table inode handle
3953 * @return operation status
3957 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3959 /* This routine, for now, will only be called for read-write volumes */
3961 int BlocksInVolume = 0, FilesInVolume = 0;
3963 struct DirSummary rootdir, oldrootdir;
3964 struct VnodeInfo *dirVnodeInfo;
3965 struct VnodeDiskObject vnode;
3966 VolumeDiskData volHeader;
3968 int orphaned, rootdirfound = 0;
3969 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3970 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3971 struct VnodeEssence *vep;
3974 afs_sfsize_t nBytes;
3976 VnodeId LFVnode, ThisVnode;
3977 Unique LFUnique, ThisUnique;
3981 vid = rwIsp->volSummary->header.id;
3982 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3983 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3984 opr_Assert(nBytes == sizeof(volHeader));
3985 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3986 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3987 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3989 DistilVnodeEssence(salvinfo, vid, vLarge,
3990 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3991 DistilVnodeEssence(salvinfo, vid, vSmall,
3992 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3994 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3995 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3996 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3997 &rootdir, &rootdirfound);
4000 nt_sync(salvinfo->fileSysDevice);
4002 sync(); /* This used to be done lower level, for every dir */
4009 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
4011 Log("Cannot find root directory for volume %lu; attempting to create "
4012 "a new one\n", afs_printable_uint32_lu(vid));
4014 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
4019 salvinfo->VolumeChanged = 1;
4023 /* Parse each vnode looking for orphaned vnodes and
4024 * connect them to the tree as orphaned (if requested).
4026 oldrootdir = rootdir;
4027 for (class = 0; class < nVNODECLASSES; class++) {
4028 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
4029 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
4030 ThisVnode = bitNumberToVnodeNumber(v, class);
4031 ThisUnique = vep->unique;
4033 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
4034 continue; /* Ignore unused, claimed, and root vnodes */
4036 /* This vnode is orphaned. If it is a directory vnode, then the '..'
4037 * entry in this vnode had incremented the parent link count (In
4038 * JudgeEntry()). We need to go to the parent and decrement that
4039 * link count. But if the parent's unique is zero, then the parent
4040 * link count was not incremented in JudgeEntry().
4042 if (class == vLarge) { /* directory vnode */
4043 pv = vnodeIdToBitNumber(vep->parent);
4044 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
4045 if (vep->parent == 1 && newrootdir) {
4046 /* this vnode's parent was the volume root, and
4047 * we just created the volume root. So, the parent
4048 * dir didn't exist during JudgeEntry, so the link
4049 * count was not inc'd there, so don't dec it here.
4055 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
4061 continue; /* If no rootdir, can't attach orphaned files */
4063 /* Here we attach orphaned files and directories into the
4064 * root directory, LVVnode, making sure link counts stay correct.
4066 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
4067 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
4068 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
4070 /* Update this orphaned vnode's info. Its parent info and
4071 * link count (do for orphaned directories and files).
4073 vep->parent = LFVnode; /* Parent is the root dir */
4074 vep->unique = LFUnique;
4077 vep->count--; /* Inc link count (root dir will pt to it) */
4079 /* If this orphaned vnode is a directory, change '..'.
4080 * The name of the orphaned dir/file is unknown, so we
4081 * build a unique name. No need to CopyOnWrite the directory
4082 * since it is not connected to tree in BK or RO volume and
4083 * won't be visible there.
4085 if (class == vLarge) {
4089 /* Remove and recreate the ".." entry in this orphaned directory */
4090 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4091 salvinfo->vnodeInfo[class].inodes[v],
4092 &salvinfo->VolumeChanged);
4094 pa.Unique = LFUnique;
4095 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4096 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4098 /* The original parent's link count was decremented above.
4099 * Here we increment the new parent's link count.
4101 pv = vnodeIdToBitNumber(LFVnode);
4102 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4106 /* Go to the root dir and add this entry. The link count of the
4107 * root dir was incremented when ".." was created. Try 10 times.
4109 for (j = 0; j < 10; j++) {
4110 pa.Vnode = ThisVnode;
4111 pa.Unique = ThisUnique;
4113 snprintf(npath, sizeof npath, "%s.%u.%u",
4114 ((class == vLarge) ? "__ORPHANDIR__"
4115 : "__ORPHANFILE__"),
4116 ThisVnode, ThisUnique);
4118 CopyOnWrite(salvinfo, &rootdir);
4119 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4123 ThisUnique += 50; /* Try creating a different file */
4125 opr_Assert(code == 0);
4126 Log("Attaching orphaned %s to volume's root dir as %s\n",
4127 ((class == vLarge) ? "directory" : "file"), npath);
4129 } /* for each vnode in the class */
4130 } /* for each class of vnode */
4132 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4134 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4136 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4138 opr_Assert(code == 0);
4139 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4142 DFlush(); /* Flush the changes */
4143 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4144 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4145 orphans = ORPH_IGNORE;
4148 /* Write out all changed vnodes. Orphaned files and directories
4149 * will get removed here also (if requested).
4151 for (class = 0; class < nVNODECLASSES; class++) {
4152 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4153 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4154 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4155 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4156 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4157 for (i = 0; i < nVnodes; i++) {
4158 struct VnodeEssence *vnp = &vnodes[i];
4159 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4161 /* If the vnode is good but is unclaimed (not listed in
4162 * any directory entries), then it is orphaned.
4165 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4166 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4170 if (vnp->changed || vnp->count) {
4173 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4174 vnodeIndexOffset(vcp, vnodeNumber),
4175 (char *)&vnode, sizeof(vnode));
4176 opr_Assert(nBytes == sizeof(vnode));
4178 vnode.parent = vnp->parent;
4179 oldCount = vnode.linkCount;
4180 vnode.linkCount = vnode.linkCount - vnp->count;
4183 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4185 if (!vnp->todelete) {
4186 /* Orphans should have already been attached (if requested) */
4187 opr_Assert(orphans != ORPH_ATTACH);
4188 oblocks += vnp->blockCount;
4191 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4193 BlocksInVolume -= vnp->blockCount;
4195 if (VNDISK_GET_INO(&vnode)) {
4197 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4198 opr_Assert(code == 0);
4200 memset(&vnode, 0, sizeof(vnode));
4202 } else if (vnp->count) {
4204 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4207 vnode.modeBits = vnp->modeBits;
4210 vnode.dataVersion++;
4213 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4214 vnodeIndexOffset(vcp, vnodeNumber),
4215 (char *)&vnode, sizeof(vnode));
4216 opr_Assert(nBytes == sizeof(vnode));
4218 salvinfo->VolumeChanged = 1;
4222 if (!Showmode && ofiles) {
4223 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4225 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4229 for (class = 0; class < nVNODECLASSES; class++) {
4230 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4231 for (i = 0; i < vip->nVnodes; i++)
4232 if (vip->vnodes[i].name)
4233 free(vip->vnodes[i].name);
4240 /* Set correct resource utilization statistics */
4241 volHeader.filecount = FilesInVolume;
4242 volHeader.diskused = BlocksInVolume;
4244 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4245 if (volHeader.uniquifier < (maxunique + 1)) {
4247 Log("Volume uniquifier %u is too low (max uniq %u); fixed\n", volHeader.uniquifier, maxunique);
4248 /* Plus 2,000 in case there are workstations out there with
4249 * cached vnodes that have since been deleted
4251 volHeader.uniquifier = (maxunique + 1 + 2000);
4255 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4256 "Only use this salvaged volume to copy data to another volume; "
4257 "do not continue to use this volume (%lu) as-is.\n",
4258 afs_printable_uint32_lu(vid));
4261 if (!Testing && salvinfo->VolumeChanged) {
4262 #ifdef FSSYNC_BUILD_CLIENT
4263 if (salvinfo->useFSYNC) {
4264 afs_int32 fsync_code;
4266 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4268 Log("Error trying to tell the fileserver to break callbacks for "
4269 "changed volume %lu; error code %ld\n",
4270 afs_printable_uint32_lu(vid),
4271 afs_printable_int32_ld(fsync_code));
4273 salvinfo->VolumeChanged = 0;
4276 #endif /* FSSYNC_BUILD_CLIENT */
4278 #ifdef AFS_DEMAND_ATTACH_FS
4279 if (!salvinfo->useFSYNC) {
4280 /* A volume's contents have changed, but the fileserver will not
4281 * break callbacks on the volume until it tries to load the vol
4282 * header. So, to reduce the amount of time a client could have
4283 * stale data, remove fsstate.dat, so the fileserver will init
4284 * callback state with all clients. This is a very coarse hammer,
4285 * and in the future we should just record which volumes have
4287 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4288 if (code && errno != ENOENT) {
4289 Log("Error %d when trying to unlink FS state file %s\n", errno,
4290 AFSDIR_SERVER_FSSTATE_FILEPATH);
4296 /* Turn off the inUse bit; the volume's been salvaged! */
4297 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4298 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4299 volHeader.inService = 1; /* allow service again */
4300 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4301 volHeader.dontSalvage = DONT_SALVAGE;
4302 salvinfo->VolumeChanged = 0;
4304 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4305 opr_Assert(nBytes == sizeof(volHeader));
4308 Log("%sSalvaged %s (%" AFS_VOLID_FMT "): %d files, %d blocks\n",
4309 (Testing ? "It would have " : ""), volHeader.name, afs_printable_VolumeId_lu(volHeader.id),
4310 FilesInVolume, BlocksInVolume);
4313 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4314 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4320 ClearROInUseBit(struct VolumeSummary *summary)
4322 IHandle_t *h = summary->volumeInfoHandle;
4323 afs_sfsize_t nBytes;
4325 VolumeDiskData volHeader;
4327 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4328 opr_Assert(nBytes == sizeof(volHeader));
4329 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4330 volHeader.inUse = 0;
4331 volHeader.needsSalvaged = 0;
4332 volHeader.inService = 1;
4333 volHeader.dontSalvage = DONT_SALVAGE;
4335 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4336 opr_Assert(nBytes == sizeof(volHeader));
4341 * Possible delete the volume.
4343 * deleteMe - Always do so, only a partial volume.
4346 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4347 char *message, int deleteMe, int check)
4349 if (readOnly(isp) || deleteMe) {
4350 if (isp->volSummary && !isp->volSummary->deleted) {
4353 Log("Volume %" AFS_VOLID_FMT " (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", afs_printable_VolumeId_lu(isp->volumeId));
4355 Log("It will be deleted on this server (you may find it elsewhere)\n");
4358 Log("Volume %" AFS_VOLID_FMT " needs to be salvaged. Since it is read-only, however,\n", afs_printable_VolumeId_lu(isp->volumeId));
4360 Log("it will be deleted instead. It should be recloned.\n");
4365 char filename[VMAXPATHLEN];
4366 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4367 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4369 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4371 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
4372 afs_printable_int32_ld(code),
4373 afs_printable_VolumeId_lu(isp->volumeId));
4376 /* make sure we actually delete the header file; ENOENT
4377 * is fine, since VDestroyVolumeDiskHeader probably already
4379 if (unlink(path) && errno != ENOENT) {
4380 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4382 if (salvinfo->useFSYNC) {
4383 AskDelete(salvinfo, isp->volumeId);
4385 isp->volSummary->deleted = 1;
4388 } else if (!check) {
4389 Log("%s salvage was unsuccessful: read-write volume %" AFS_VOLID_FMT "\n", message,
4390 afs_printable_VolumeId_lu(isp->volumeId));
4391 Abort("Salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
4395 #ifdef AFS_DEMAND_ATTACH_FS
4397 * Locks a volume on disk for salvaging.
4399 * @param[in] volumeId volume ID to lock
4401 * @return operation status
4403 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4404 * checked out and locked again
4409 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4414 /* should always be WRITE_LOCK, but keep the lock-type logic all
4415 * in one place, in VVolLockType. Params will be ignored, but
4416 * try to provide what we're logically doing. */
4417 locktype = VVolLockType(V_VOLUPD, 1);
4419 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4421 if (code == EBUSY) {
4422 Abort("Someone else appears to be using volume %lu; Aborted\n",
4423 afs_printable_uint32_lu(volumeId));
4425 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4426 afs_printable_int32_ld(code),
4427 afs_printable_uint32_lu(volumeId));
4430 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4431 if (code == SYNC_DENIED) {
4432 /* need to retry checking out volumes */
4435 if (code != SYNC_OK) {
4436 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4437 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4440 /* set inUse = programType in the volume header to ensure that nobody
4441 * tries to use this volume again without salvaging, if we somehow crash
4442 * or otherwise exit before finishing the salvage.
4446 struct VolumeHeader header;
4447 struct VolumeDiskHeader diskHeader;
4448 struct VolumeDiskData volHeader;
4450 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4455 DiskToVolumeHeader(&header, &diskHeader);
4457 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4458 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4459 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4465 volHeader.inUse = programType;
4467 /* If we can't re-write the header, bail out and error. We don't
4468 * assert when reading the header, since it's possible the
4469 * header isn't really there (when there's no data associated
4470 * with the volume; we just delete the vol header file in that
4471 * case). But if it's there enough that we can read it, but
4472 * somehow we cannot write to it to signify we're salvaging it,
4473 * we've got a big problem and we cannot continue. */
4474 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4475 == sizeof(volHeader));
4482 #endif /* AFS_DEMAND_ATTACH_FS */
4485 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4487 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4489 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4490 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4491 if (code != SYNC_OK) {
4492 Log("AskError: failed to force volume %lu into error state; "
4493 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4494 (long)code, SYNC_res2string(code));
4496 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4500 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4505 memset(&res, 0, sizeof(res));
4507 for (i = 0; i < 3; i++) {
4508 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4509 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4511 if (code == SYNC_OK) {
4513 } else if (code == SYNC_DENIED) {
4515 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4517 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4518 Abort("Salvage aborted\n");
4519 } else if (code == SYNC_BAD_COMMAND) {
4520 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4523 #ifdef AFS_DEMAND_ATTACH_FS
4524 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4526 Log("AskOffline: fileserver is DAFS but we are not.\n");
4529 #ifdef AFS_DEMAND_ATTACH_FS
4530 Log("AskOffline: fileserver is not DAFS but we are.\n");
4532 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4535 Abort("Salvage aborted\n");
4538 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4539 FSYNC_clientFinis();
4543 if (code != SYNC_OK) {
4544 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4545 Abort("Salvage aborted\n");
4549 /* don't want to pass around state; remember it here */
4550 static int isDAFS = -1;
4555 afs_int32 code = 1, i;
4557 /* we don't care if we race. the answer shouldn't change */
4561 memset(&res, 0, sizeof(res));
4563 for (i = 0; code && i < 3; i++) {
4564 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4566 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4567 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4568 FSYNC_reason2string(res.hdr.reason));
4569 FSYNC_clientFinis();
4575 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4579 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4589 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4591 struct VolumeDiskHeader diskHdr;
4593 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4595 /* volume probably does not exist; no need to bring back online */
4598 AskOnline(salvinfo, volumeId);
4602 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4606 for (i = 0; i < 3; i++) {
4607 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4608 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4610 if (code == SYNC_OK) {
4612 } else if (code == SYNC_DENIED) {
4613 Log("AskOnline: file server denied online request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4614 } else if (code == SYNC_BAD_COMMAND) {
4615 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4617 Log("AskOnline: please make sure file server binaries are same version.\n");
4621 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4622 FSYNC_clientFinis();
4629 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4634 for (i = 0; i < 3; i++) {
4635 memset(&res, 0, sizeof(res));
4636 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4637 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4639 if (code == SYNC_OK) {
4641 } else if (code == SYNC_DENIED) {
4642 Log("AskOnline: file server denied DONE request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4643 } else if (code == SYNC_BAD_COMMAND) {
4644 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4647 #ifdef AFS_DEMAND_ATTACH_FS
4648 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4650 Log("AskOnline: fileserver is DAFS but we are not.\n");
4653 #ifdef AFS_DEMAND_ATTACH_FS
4654 Log("AskOnline: fileserver is not DAFS but we are.\n");
4656 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4660 } else if (code == SYNC_FAILED &&
4661 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4662 res.hdr.reason == FSYNC_WRONG_PART)) {
4663 /* volume is already effectively 'deleted' */
4667 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4668 FSYNC_clientFinis();
4675 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4677 /* Volume parameter is passed in case iopen is upgraded in future to
4678 * require a volume Id to be passed
4681 IHandle_t *srcH, *destH;
4682 FdHandle_t *srcFdP, *destFdP;
4684 afs_foff_t size = 0;
4686 IH_INIT(srcH, device, rwvolume, inode1);
4687 srcFdP = IH_OPEN(srcH);
4688 opr_Assert(srcFdP != NULL);
4689 IH_INIT(destH, device, rwvolume, inode2);
4690 destFdP = IH_OPEN(destH);
4691 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4692 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4695 opr_Assert(nBytes == 0);
4696 FDH_REALLYCLOSE(srcFdP);
4697 FDH_REALLYCLOSE(destFdP);
4704 PrintInodeList(struct SalvInfo *salvinfo)
4706 struct ViceInodeInfo *ip;
4707 struct ViceInodeInfo *buf;
4710 afs_sfsize_t st_size;
4712 st_size = OS_SIZE(salvinfo->inodeFd);
4713 opr_Assert(st_size >= 0);
4714 buf = malloc(st_size);
4715 opr_Assert(buf != NULL);
4716 nInodes = st_size / sizeof(struct ViceInodeInfo);
4717 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4718 for (ip = buf; nInodes--; ip++) {
4719 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%" AFS_VOLID_FMT ",%u,%u,%u)\n", /* VolumeId in param */
4720 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4721 (afs_uintmax_t) ip->byteCount,
4722 afs_printable_VolumeId_lu(ip->u.param[0]), ip->u.param[1],
4723 ip->u.param[2], ip->u.param[3]);
4729 PrintInodeSummary(struct SalvInfo *salvinfo)
4732 struct InodeSummary *isp;
4734 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4735 isp = &salvinfo->inodeSummary[i];
4736 Log("VID:%" AFS_VOLID_FMT ", RW:%" AFS_VOLID_FMT ", index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->RWvolumeId), isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4746 opr_Assert(0); /* Fork is never executed in the NT code path */
4750 #ifdef AFS_DEMAND_ATTACH_FS
4751 if ((f == 0) && (programType == salvageServer)) {
4752 /* we are a salvageserver child */
4753 #ifdef FSSYNC_BUILD_CLIENT
4754 VChildProcReconnectFS_r();
4756 #ifdef SALVSYNC_BUILD_CLIENT
4760 #endif /* AFS_DEMAND_ATTACH_FS */
4761 #endif /* !AFS_NT40_ENV */
4768 #ifdef AFS_DEMAND_ATTACH_FS
4769 if (programType == salvageServer) {
4770 /* release all volume locks before closing down our SYNC channels.
4771 * the fileserver may try to online volumes we have checked out when
4772 * we close down FSSYNC, so we should make sure we don't have those
4773 * volumes locked when it does */
4774 struct DiskPartition64 *dp;
4776 for (i = 0; i <= VOLMAXPARTS; i++) {
4777 dp = VGetPartitionById(i, 0);
4779 VLockFileReinit(&dp->volLockFile);
4782 # ifdef SALVSYNC_BUILD_CLIENT
4785 # ifdef FSSYNC_BUILD_CLIENT
4789 #endif /* AFS_DEMAND_ATTACH_FS */
4792 if (main_thread != pthread_self())
4793 pthread_exit((void *)code);
4814 pid = wait(&status);
4815 opr_Assert(pid != -1);
4816 if (WCOREDUMP(status))
4817 Log("\"%s\" core dumped!\n", prog);
4818 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4824 TimeStamp(time_t clock, int precision)
4827 static char timestamp[20];
4828 lt = localtime(&clock);
4830 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4832 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4837 CheckLogFile(char * log_path)
4841 #ifndef AFS_NT40_ENV
4848 if (asprintf(&oldSlvgLog, "%s.old", log_path) >= 0) {
4849 rk_rename(log_path, oldSlvgLog);
4852 logFile = afs_fopen(log_path, "a");
4854 if (!logFile) { /* still nothing, use stdout */
4857 #ifndef AFS_NAMEI_ENV
4858 AFS_DEBUG_IOPS_LOG(logFile);
4863 #ifndef AFS_NT40_ENV
4865 TimeStampLogFile(char * log_path)
4872 lt = localtime(&now);
4873 if (asprintf(&stampSlvgLog,
4874 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4875 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4876 lt->tm_min, lt->tm_sec) >= 0) {
4877 /* try to link the logfile to a timestamped filename */
4878 /* if it fails, oh well, nothing we can do */
4879 if (link(log_path, stampSlvgLog))
4887 SalvageShowLog(void)
4891 if (ShowLog == 0 || logFile == stdout || logFile == stderr) {
4895 #ifndef AFS_NT40_ENV
4897 printf("Can't show log since using syslog.\n");
4908 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4911 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4914 while (fgets(line, sizeof(line), logFile))
4921 Log(const char *format, ...)
4927 va_start(args, format);
4928 vsnprintf(tmp, sizeof tmp, format, args);
4930 #ifndef AFS_NT40_ENV
4932 syslog(LOG_INFO, "%s", tmp);
4936 gettimeofday(&now, NULL);
4937 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4943 Abort(const char *format, ...)
4948 va_start(args, format);
4949 vsnprintf(tmp, sizeof tmp, format, args);
4951 #ifndef AFS_NT40_ENV
4953 syslog(LOG_INFO, "%s", tmp);
4957 fprintf(logFile, "%s", tmp);
4968 ToString(const char *s)
4972 opr_Assert(p != NULL);
4976 /* Remove the FORCESALVAGE file */
4978 RemoveTheForce(char *path)
4981 struct afs_stat_st force; /* so we can use afs_stat to find it */
4982 strcpy(target,path);
4983 strcat(target,"/FORCESALVAGE");
4984 if (!Testing && ForceSalvage) {
4985 if (afs_stat(target,&force) == 0) unlink(target);
4989 #ifndef AFS_AIX32_ENV
4991 * UseTheForceLuke - see if we can use the force
4994 UseTheForceLuke(char *path)
4996 struct afs_stat_st force;
4998 strcpy(target,path);
4999 strcat(target,"/FORCESALVAGE");
5001 return (afs_stat(target, &force) == 0);
5005 * UseTheForceLuke - see if we can use the force
5008 * The VRMIX fsck will not muck with the filesystem it is supposedly
5009 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
5010 * muck directly with the root inode, which is within the normal
5012 * ListViceInodes() has a side effect of setting ForceSalvage if
5013 * it detects a need, based on root inode examination.
5016 UseTheForceLuke(char *path)
5019 return 0; /* sorry OB1 */
5024 /* NT support routines */
5026 static char execpathname[MAX_PATH];
5028 nt_SalvagePartition(char *partName, int jobn)
5033 if (!*execpathname) {
5034 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
5035 if (!n || n == 1023)
5038 job.cj_magic = SALVAGER_MAGIC;
5039 job.cj_number = jobn;
5040 (void)strcpy(job.cj_part, partName);
5041 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
5046 nt_SetupPartitionSalvage(void *datap, int len)
5048 childJob_t *jobp = (childJob_t *) datap;
5051 if (len != sizeof(childJob_t))
5053 if (jobp->cj_magic != SALVAGER_MAGIC)
5058 if (asprintf(&logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
5059 myjob.cj_number) < 0)
5061 logFile = afs_fopen(logname, "w");
5070 #endif /* AFS_NT40_ENV */