2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
105 #if defined(AFS_VFSINCL_ENV)
106 #include <sys/vnode.h>
108 #include <sys/fs/ufs_inode.h>
110 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
111 #include <ufs/ufs/dinode.h>
112 #include <ufs/ffs/fs.h>
114 #include <ufs/inode.h>
117 #else /* AFS_VFSINCL_ENV */
119 #include <ufs/inode.h>
120 #else /* AFS_OSF_ENV */
121 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
122 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
160 #include <afs/afssyscalls.h>
164 #include "partition.h"
165 #include "daemon_com.h"
166 #include "daemon_com_inline.h"
168 #include "fssync_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[9]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = calloc(1, sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 thisjob->partP = partP;
495 thisjob->jobnumb = jobcount;
497 } else if (jobcount == 0) {
498 /* We are asking to wait for all jobs (partp == 0), yet we never
501 Log("No file system partitions named %s* found; not salvaged\n",
502 VICE_PARTITION_PREFIX);
506 if (debug || Parallel == 1) {
508 SalvageFileSys(thisjob->partP, 0);
515 /* Check to see if thisjob is for a disk that we are already
516 * salvaging. If it is, link it in as the next job to do. The
517 * jobs array has 1 entry per disk being salvages. numjobs is
518 * the total number of disks currently being salvaged. In
519 * order to keep thejobs array compact, when a disk is
520 * completed, the hightest element in the jobs array is moved
521 * down to now open slot.
523 for (j = 0; j < numjobs; j++) {
524 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
525 /* On same disk, add it to this list and return */
526 thisjob->nextjob = jobs[j]->nextjob;
527 jobs[j]->nextjob = thisjob;
534 /* Loop until we start thisjob or until all existing jobs are finished */
535 while (thisjob || (!partP && (numjobs > 0))) {
536 startjob = -1; /* No new job to start */
538 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
539 /* Either the max jobs are running or we have to wait for all
540 * the jobs to finish. In either case, we wait for at least one
541 * job to finish. When it's done, clean up after it.
543 pid = wait(&wstatus);
544 opr_Assert(pid != -1);
545 for (j = 0; j < numjobs; j++) { /* Find which job it is */
546 if (pid == jobs[j]->pid)
549 opr_Assert(j < numjobs);
550 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
551 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
554 numjobs--; /* job no longer running */
555 oldjob = jobs[j]; /* remember */
556 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
557 free(oldjob); /* free the old job */
559 /* If there is another partition on the disk to salvage, then
560 * say we will start it (startjob). If not, then put thisjob there
561 * and say we will start it.
563 if (jobs[j]) { /* Another partitions to salvage */
564 startjob = j; /* Will start it */
565 } else { /* There is not another partition to salvage */
567 jobs[j] = thisjob; /* Add thisjob */
569 startjob = j; /* Will start it */
571 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
572 startjob = -1; /* Don't start it - already running */
576 /* We don't have to wait for a job to complete */
578 jobs[numjobs] = thisjob; /* Add this job */
580 startjob = numjobs; /* Will start it */
584 /* Start up a new salvage job on a partition in job slot "startjob" */
585 if (startjob != -1) {
587 Log("Starting salvage of file system partition %s\n",
588 jobs[startjob]->partP->name);
590 /* For NT, we not only fork, but re-exec the salvager. Pass in the
591 * commands and pass the child job number via the data path.
594 nt_SalvagePartition(jobs[startjob]->partP->name,
595 jobs[startjob]->jobnumb);
596 jobs[startjob]->pid = pid;
601 jobs[startjob]->pid = pid;
607 for (fd = 0; fd < 16; fd++)
614 openlog("salvager", LOG_PID, useSyslogFacility);
618 snprintf(logFileName, sizeof logFileName, "%s.%d",
619 AFSDIR_SERVER_SLVGLOG_FILEPATH,
620 jobs[startjob]->jobnumb);
621 logFile = afs_fopen(logFileName, "w");
626 SalvageFileSys1(jobs[startjob]->partP, 0);
631 } /* while ( thisjob || (!partP && numjobs > 0) ) */
633 /* If waited for all jobs to complete, now collect log files and return */
635 if (!useSyslog) /* if syslogging - no need to collect */
638 for (i = 0; i < jobcount; i++) {
639 snprintf(logFileName, sizeof logFileName, "%s.%d",
640 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
641 if ((passLog = afs_fopen(logFileName, "r"))) {
642 while (fgets(buf, sizeof(buf), passLog)) {
647 (void)unlink(logFileName);
656 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
658 if (!canfork || debug || Fork() == 0) {
659 SalvageFileSys1(partP, singleVolumeNumber);
660 if (canfork && !debug) {
665 Wait("SalvageFileSys");
669 get_DevName(char *pbuffer, char *wpath)
671 char pbuf[128], *ptr;
672 strcpy(pbuf, pbuffer);
673 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
679 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
681 strcpy(pbuffer, ptr + 1);
688 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
691 char inodeListPath[256];
692 FD_t inodeFile = INVALID_FD;
693 static char tmpDevName[100];
694 static char wpath[100];
695 struct VolumeSummary *vsp, *esp;
699 struct SalvInfo l_salvinfo;
700 struct SalvInfo *salvinfo = &l_salvinfo;
703 memset(salvinfo, 0, sizeof(*salvinfo));
706 if (inodeFile != INVALID_FD) {
708 inodeFile = INVALID_FD;
710 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
711 Abort("Raced too many times with fileserver restarts while trying to "
712 "checkout/lock volumes; Aborted\n");
714 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
716 /* unlock all previous volume locks, since we're about to lock them
718 VLockFileReinit(&partP->volLockFile);
720 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
722 salvinfo->fileSysPartition = partP;
723 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
724 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
727 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
728 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
729 name = partP->devName;
731 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
732 strcpy(tmpDevName, partP->devName);
733 name = get_DevName(tmpDevName, wpath);
734 salvinfo->fileSysDeviceName = name;
735 salvinfo->filesysfulldev = wpath;
738 if (singleVolumeNumber) {
739 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
740 /* only non-DAFS locks the partition when salvaging a single volume;
741 * DAFS will lock the individual volumes in the VG */
742 VLockPartition(partP->name);
743 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
747 /* salvageserver already setup fssync conn for us */
748 if ((programType != salvageServer) && !VConnectFS()) {
749 Abort("Couldn't connect to file server\n");
752 salvinfo->useFSYNC = 1;
753 AskOffline(salvinfo, singleVolumeNumber);
754 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
755 if (LockVolume(salvinfo, singleVolumeNumber)) {
758 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
761 salvinfo->useFSYNC = 0;
762 VLockPartition(partP->name);
766 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
769 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
770 partP->name, name, (Testing ? "(READONLY mode)" : ""));
772 Log("***Forced salvage of all volumes on this partition***\n");
777 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
784 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
785 while ((dp = readdir(dirp))) {
786 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
787 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
789 Log("Removing old salvager temp files %s\n", dp->d_name);
790 strcpy(npath, salvinfo->fileSysPath);
791 strcat(npath, OS_DIRSEP);
792 strcat(npath, dp->d_name);
798 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
800 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
801 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
803 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
807 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
808 if (inodeFile == INVALID_FD) {
809 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
812 /* Using nt_unlink here since we're really using the delete on close
813 * semantics of unlink. In most places in the salvager, we really do
814 * mean to unlink the file at that point. Those places have been
815 * modified to actually do that so that the NT crt can be used there.
817 * jaltman - On NT delete on close cannot be applied to a file while the
818 * process has an open file handle that does not have DELETE file
819 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
820 * delete privileges. As a result the nt_unlink() call will always
823 code = nt_unlink(inodeListPath);
825 code = unlink(inodeListPath);
828 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
831 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
835 salvinfo->inodeFd = inodeFile;
836 if (salvinfo->inodeFd == INVALID_FD)
837 Abort("Temporary file %s is missing...\n", inodeListPath);
838 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
839 if (ListInodeOption) {
840 PrintInodeList(salvinfo);
841 if (singleVolumeNumber) {
842 /* We've checked out the volume from the fileserver, and we need
843 * to give it back. We don't know if the volume exists or not,
844 * so we don't know whether to AskOnline or not. Try to determine
845 * if the volume exists by trying to read the volume header, and
846 * AskOnline if it is readable. */
847 MaybeAskOnline(salvinfo, singleVolumeNumber);
851 /* enumerate volumes in the partition.
852 * figure out sets of read-only + rw volumes.
853 * salvage each set, read-only volumes first, then read-write.
854 * Fix up inodes on last volume in set (whether it is read-write
857 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
861 if (singleVolumeNumber) {
862 /* If we delete a volume during the salvage, we indicate as such by
863 * setting the volsummary->deleted field. We need to know if we
864 * deleted a volume or not in order to know which volumes to bring
865 * back online after the salvage. If we fork, we will lose this
866 * information, since volsummary->deleted will not get set in the
867 * parent. So, don't fork. */
871 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
872 i < salvinfo->nVolumesInInodeFile; i = j) {
873 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
875 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
877 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
878 struct VolumeSummary *tsp;
879 /* Scan volume list (from partition root directory) looking for the
880 * current rw volume number in the volume list from the inode scan.
881 * If there is one here that is not in the inode volume list,
883 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
885 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
887 /* Now match up the volume summary info from the root directory with the
888 * entry in the volume list obtained from scanning inodes */
889 salvinfo->inodeSummary[j].volSummary = NULL;
890 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
891 if (tsp->header.id == vid) {
892 salvinfo->inodeSummary[j].volSummary = tsp;
898 /* Salvage the group of volumes (several read-only + 1 read/write)
899 * starting with the current read-only volume we're looking at.
902 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
904 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
905 #endif /* AFS_NT40_ENV */
909 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
910 for (; vsp < esp; vsp++) {
912 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
915 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
916 RemoveTheForce(salvinfo->fileSysPath);
918 if (!Testing && singleVolumeNumber) {
920 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
921 /* unlock vol headers so the fs can attach them when we AskOnline */
922 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
923 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
925 /* Step through the volumeSummary list and set all volumes on-line.
926 * Most volumes were taken off-line in GetVolumeSummary.
927 * If a volume was deleted, don't tell the fileserver anything, since
928 * we already told the fileserver the volume was deleted back when we
929 * we destroyed the volume header.
930 * Also, make sure we bring the singleVolumeNumber back online first.
933 for (j = 0; j < salvinfo->nVolumes; j++) {
934 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
936 if (!salvinfo->volumeSummaryp[j].deleted) {
937 AskOnline(salvinfo, singleVolumeNumber);
943 /* If singleVolumeNumber is not in our volumeSummary, it means that
944 * at least one other volume in the VG is on the partition, but the
945 * RW volume is not. We've already AskOffline'd it by now, though,
946 * so make sure we don't still have the volume checked out. */
947 AskDelete(salvinfo, singleVolumeNumber);
950 for (j = 0; j < salvinfo->nVolumes; j++) {
951 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
952 if (!salvinfo->volumeSummaryp[j].deleted) {
953 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
959 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
960 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
963 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
967 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
970 char filename[VMAXPATHLEN];
976 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
977 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
980 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
983 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
985 Log("Error %ld destroying volume disk header for volume %lu\n",
986 afs_printable_int32_ld(code),
987 afs_printable_uint32_lu(vsp->header.id));
990 /* make sure we actually delete the header file; ENOENT
991 * is fine, since VDestroyVolumeDiskHeader probably already
993 if (unlink(path) && errno != ENOENT) {
994 Log("Unable to unlink %s (errno = %d)\n", path, errno);
996 if (salvinfo->useFSYNC) {
997 AskDelete(salvinfo, vsp->header.id);
1004 CompareInodes(const void *_p1, const void *_p2)
1006 const struct ViceInodeInfo *p1 = _p1;
1007 const struct ViceInodeInfo *p2 = _p2;
1008 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1009 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1010 VolumeId p1rwid, p2rwid;
1012 (p1->u.vnode.vnodeNumber ==
1013 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1015 (p2->u.vnode.vnodeNumber ==
1016 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1017 if (p1rwid < p2rwid)
1019 if (p1rwid > p2rwid)
1021 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1022 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1023 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1024 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1025 if (p1->u.vnode.volumeId == p1rwid)
1027 if (p2->u.vnode.volumeId == p2rwid)
1029 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1031 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1032 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1033 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1035 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1037 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1039 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1041 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1043 /* The following tests are reversed, so that the most desirable
1044 * of several similar inodes comes first */
1045 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1046 #ifdef AFS_3DISPARES
1047 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1048 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1051 #ifdef AFS_SGI_EXMAG
1052 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1053 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1058 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1059 #ifdef AFS_3DISPARES
1060 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1061 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1064 #ifdef AFS_SGI_EXMAG
1065 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1066 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1071 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1072 #ifdef AFS_3DISPARES
1073 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1074 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1077 #ifdef AFS_SGI_EXMAG
1078 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1079 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1084 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1085 #ifdef AFS_3DISPARES
1086 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1087 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1090 #ifdef AFS_SGI_EXMAG
1091 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1092 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1101 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1102 struct InodeSummary *summary)
1104 VolumeId volume = ip->u.vnode.volumeId;
1105 VolumeId rwvolume = volume;
1110 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1112 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1114 rwvolume = ip->u.special.parentId;
1115 /* This isn't quite right, as there could (in error) be different
1116 * parent inodes in different special vnodes */
1118 if (maxunique < ip->u.vnode.vnodeUniquifier)
1119 maxunique = ip->u.vnode.vnodeUniquifier;
1123 summary->volumeId = volume;
1124 summary->RWvolumeId = rwvolume;
1125 summary->nInodes = n;
1126 summary->nSpecialInodes = nSpecial;
1127 summary->maxUniquifier = maxunique;
1131 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1133 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1134 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1135 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1140 * Collect list of inodes in file named by path. If a truly fatal error,
1141 * unlink the file and abort. For lessor errors, return -1. The file will
1142 * be unlinked by the caller.
1145 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1149 struct ViceInodeInfo *ip, *ip_save;
1150 struct InodeSummary summary;
1151 char summaryFileName[50];
1152 FD_t summaryFile = INVALID_FD;
1154 char *dev = salvinfo->fileSysPath;
1155 char *wpath = salvinfo->fileSysPath;
1157 char *dev = salvinfo->fileSysDeviceName;
1158 char *wpath = salvinfo->filesysfulldev;
1160 char *part = salvinfo->fileSysPath;
1165 afs_sfsize_t st_size;
1167 /* This file used to come from vfsck; cobble it up ourselves now... */
1169 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1170 singleVolumeNumber ? OnlyOneVolume : 0,
1171 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1173 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1177 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1179 if (forceSal && !ForceSalvage) {
1180 Log("***Forced salvage of all volumes on this partition***\n");
1183 OS_SEEK(inodeFile, 0L, SEEK_SET);
1184 salvinfo->inodeFd = inodeFile;
1185 if (salvinfo->inodeFd == INVALID_FD ||
1186 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1187 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1189 tdir = (tmpdir ? tmpdir : part);
1191 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1192 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1194 snprintf(summaryFileName, sizeof summaryFileName,
1195 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1197 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1198 if (summaryFile == INVALID_FD) {
1199 Abort("Unable to create inode summary file\n");
1203 /* Using nt_unlink here since we're really using the delete on close
1204 * semantics of unlink. In most places in the salvager, we really do
1205 * mean to unlink the file at that point. Those places have been
1206 * modified to actually do that so that the NT crt can be used there.
1208 * jaltman - As commented elsewhere, this cannot work because fopen()
1209 * does not open files with DELETE and FILE_SHARE_DELETE.
1211 code = nt_unlink(summaryFileName);
1213 code = unlink(summaryFileName);
1216 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1219 if (!canfork || debug || Fork() == 0) {
1220 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1222 OS_CLOSE(summaryFile);
1223 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1224 RemoveTheForce(salvinfo->fileSysPath);
1226 struct VolumeSummary *vsp;
1230 GetVolumeSummary(salvinfo, singleVolumeNumber);
1232 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1234 if (vsp->header.id == singleVolumeNumber) {
1237 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1243 MaybeAskOnline(salvinfo, singleVolumeNumber);
1245 /* make sure we get rid of stray .vol headers, even if
1246 * they're not in our volume summary (might happen if
1247 * e.g. something else created them and they're not in the
1248 * fileserver VGC) */
1249 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1250 singleVolumeNumber, 0 /*parent*/);
1251 AskDelete(salvinfo, singleVolumeNumber);
1255 Log("%s vice inodes on %s; not salvaged\n",
1256 singleVolumeNumber ? "No applicable" : "No", dev);
1261 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1263 OS_CLOSE(summaryFile);
1265 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1268 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1269 OS_CLOSE(summaryFile);
1270 Abort("Unable to read inode table; %s not salvaged\n", dev);
1272 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1273 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1274 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1275 OS_CLOSE(summaryFile);
1276 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1281 CountVolumeInodes(ip, nInodes, &summary);
1282 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1283 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1284 OS_CLOSE(summaryFile);
1288 summary.index += (summary.nInodes);
1289 nInodes -= summary.nInodes;
1290 ip += summary.nInodes;
1293 ip = ip_save = NULL;
1294 /* Following fflush is not fclose, because if it was debug mode would not work */
1295 if (OS_SYNC(summaryFile) == -1) {
1296 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1297 OS_CLOSE(summaryFile);
1301 if (canfork && !debug) {
1306 if (Wait("Inode summary") == -1) {
1307 OS_CLOSE(summaryFile);
1308 Exit(1); /* salvage of this partition aborted */
1312 st_size = OS_SIZE(summaryFile);
1313 opr_Assert(st_size >= 0);
1316 salvinfo->inodeSummary = malloc(st_size);
1317 opr_Assert(salvinfo->inodeSummary != NULL);
1318 /* For GNU we need to do lseek to get the file pointer moved. */
1319 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1320 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1321 opr_Assert(ret == st_size);
1323 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1324 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1325 salvinfo->inodeSummary[i].volSummary = NULL;
1327 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1328 OS_CLOSE(summaryFile);
1331 if (retcode && singleVolumeNumber && !deleted) {
1332 AskError(salvinfo, singleVolumeNumber);
1338 /* Comparison routine for volume sort.
1339 This is setup so that a read-write volume comes immediately before
1340 any read-only clones of that volume */
1342 CompareVolumes(const void *_p1, const void *_p2)
1344 const struct VolumeSummary *p1 = _p1;
1345 const struct VolumeSummary *p2 = _p2;
1346 if (p1->header.parent != p2->header.parent)
1347 return p1->header.parent < p2->header.parent ? -1 : 1;
1348 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1350 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1352 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1356 * Gleans volumeSummary information by asking the fileserver
1358 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1359 * salvaging a whole partition
1361 * @return whether we obtained the volume summary information or not
1362 * @retval 0 success; we obtained the volume summary information
1363 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1365 * @retval 1 we did not get the volume summary information; either the
1366 * fileserver responded with an error, or we are not supposed to
1367 * ask the fileserver for the information (e.g. we are salvaging
1368 * the entire partition or we are not the salvageserver)
1370 * @note for non-DAFS, always returns 1
1373 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1376 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1377 if (programType == salvageServer) {
1378 if (singleVolumeNumber) {
1379 FSSYNC_VGQry_response_t q_res;
1381 struct VolumeSummary *vsp;
1383 struct VolumeDiskHeader diskHdr;
1385 memset(&res, 0, sizeof(res));
1387 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1390 * We must wait for the partition to finish scanning before
1391 * can continue, since we will not know if we got the entire
1392 * VG membership unless the partition is fully scanned.
1393 * We could, in theory, just scan the partition ourselves if
1394 * the VG cache is not ready, but we would be doing the exact
1395 * same scan the fileserver is doing; it will almost always
1396 * be faster to wait for the fileserver. The only exceptions
1397 * are if the partition does not take very long to scan, and
1398 * in that case it's fast either way, so who cares?
1400 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1401 Log("waiting for fileserver to finish scanning partition %s...\n",
1402 salvinfo->fileSysPartition->name);
1404 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1405 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1406 * just so small partitions don't need to wait over 10
1407 * seconds every time, and large partitions are generally
1408 * polled only once every ten seconds. */
1409 sleep((i > 10) ? (i = 10) : i);
1411 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1415 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1416 /* This can happen if there's no header for the volume
1417 * we're salvaging, or no headers exist for the VG (if
1418 * we're salvaging an RW). Act as if we got a response
1419 * with no VG members. The headers may be created during
1420 * salvaging, if there are inodes in this VG. */
1422 memset(&q_res, 0, sizeof(q_res));
1423 q_res.rw = singleVolumeNumber;
1427 Log("fileserver refused VGCQuery request for volume %lu on "
1428 "partition %s, code %ld reason %ld\n",
1429 afs_printable_uint32_lu(singleVolumeNumber),
1430 salvinfo->fileSysPartition->name,
1431 afs_printable_int32_ld(code),
1432 afs_printable_int32_ld(res.hdr.reason));
1436 if (q_res.rw != singleVolumeNumber) {
1437 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1438 afs_printable_uint32_lu(singleVolumeNumber),
1439 afs_printable_uint32_lu(q_res.rw));
1440 #ifdef SALVSYNC_BUILD_CLIENT
1441 if (SALVSYNC_LinkVolume(q_res.rw,
1443 salvinfo->fileSysPartition->name,
1445 Log("schedule request failed\n");
1447 #endif /* SALVSYNC_BUILD_CLIENT */
1448 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1451 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1452 opr_Assert(salvinfo->volumeSummaryp != NULL);
1454 salvinfo->nVolumes = 0;
1455 vsp = salvinfo->volumeSummaryp;
1457 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1458 char name[VMAXPATHLEN];
1460 if (!q_res.children[i]) {
1464 /* AskOffline for singleVolumeNumber was called much earlier */
1465 if (q_res.children[i] != singleVolumeNumber) {
1466 AskOffline(salvinfo, q_res.children[i]);
1467 if (LockVolume(salvinfo, q_res.children[i])) {
1473 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1475 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1476 afs_printable_uint32_lu(q_res.children[i]));
1481 DiskToVolumeHeader(&vsp->header, &diskHdr);
1482 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1484 salvinfo->nVolumes++;
1488 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1493 Log("Cannot get volume summary from fileserver; falling back to scanning "
1494 "entire partition\n");
1497 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1502 * count how many volume headers are found by VWalkVolumeHeaders.
1504 * @param[in] dp the disk partition (unused)
1505 * @param[in] name full path to the .vol header (unused)
1506 * @param[in] hdr the header data (unused)
1507 * @param[in] last whether this is the last try or not (unused)
1508 * @param[in] rock actually an afs_int32*; the running count of how many
1509 * volumes we have found
1514 CountHeader(struct DiskPartition64 *dp, const char *name,
1515 struct VolumeDiskHeader *hdr, int last, void *rock)
1517 afs_int32 *nvols = (afs_int32 *)rock;
1523 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1526 struct SalvageScanParams {
1527 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1528 * vol id of the VG we're salvaging */
1529 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1530 * we're filling in */
1531 afs_int32 nVolumes; /**< # of vols we've encountered */
1532 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1533 * # of vols we've alloc'd memory for) */
1534 int retry; /**< do we need to retry vol lock/checkout? */
1535 struct SalvInfo *salvinfo; /**< salvage job info */
1539 * records volume summary info found from VWalkVolumeHeaders.
1541 * Found volumes are also taken offline if they are in the specific volume
1542 * group we are looking for.
1544 * @param[in] dp the disk partition
1545 * @param[in] name full path to the .vol header
1546 * @param[in] hdr the header data
1547 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1548 * @param[in] rock actually a struct SalvageScanParams*, containing the
1549 * information needed to record the volume summary data
1551 * @return operation status
1553 * @retval -1 volume locking raced with fileserver restart; checking out
1554 * and locking volumes needs to be retried
1555 * @retval 1 volume header is mis-named and should be deleted
1558 RecordHeader(struct DiskPartition64 *dp, const char *name,
1559 struct VolumeDiskHeader *hdr, int last, void *rock)
1561 char nameShouldBe[64];
1562 struct SalvageScanParams *params;
1563 struct VolumeSummary summary;
1564 VolumeId singleVolumeNumber;
1565 struct SalvInfo *salvinfo;
1567 params = (struct SalvageScanParams *)rock;
1569 memset(&summary, 0, sizeof(summary));
1571 singleVolumeNumber = params->singleVolumeNumber;
1572 salvinfo = params->salvinfo;
1574 DiskToVolumeHeader(&summary.header, hdr);
1576 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1577 && summary.header.parent != singleVolumeNumber) {
1579 if (programType == salvageServer) {
1580 #ifdef SALVSYNC_BUILD_CLIENT
1581 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1582 summary.header.id, summary.header.parent);
1583 if (SALVSYNC_LinkVolume(summary.header.parent,
1587 Log("schedule request failed\n");
1590 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1593 Log("%u is a read-only volume; not salvaged\n",
1594 singleVolumeNumber);
1599 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1600 || summary.header.parent == singleVolumeNumber) {
1602 /* check if the header file is incorrectly named */
1604 const char *base = strrchr(name, OS_DIRSEPC);
1611 snprintf(nameShouldBe, sizeof nameShouldBe,
1612 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1615 if (strcmp(nameShouldBe, base)) {
1616 /* .vol file has wrong name; retry/delete */
1620 if (!badname || last) {
1621 /* only offline the volume if the header is good, or if this is
1622 * the last try looking at it; avoid AskOffline'ing the same vol
1625 if (singleVolumeNumber
1626 && summary.header.id != singleVolumeNumber) {
1627 /* don't offline singleVolumeNumber; we already did that
1630 AskOffline(salvinfo, summary.header.id);
1632 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1634 /* don't lock the volume if the header is bad, since we're
1635 * about to delete it anyway. */
1636 if (LockVolume(salvinfo, summary.header.id)) {
1641 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1645 if (last && !Showmode) {
1646 Log("Volume header file %s is incorrectly named (should be %s "
1647 "not %s); %sdeleted (it will be recreated later, if "
1648 "necessary)\n", name, nameShouldBe, base,
1649 (Testing ? "it would have been " : ""));
1657 if (params->nVolumes > params->totalVolumes) {
1658 /* We found more volumes than we found on the first partition walk;
1659 * apparently something created a volume while we were
1660 * partition-salvaging, or we found more than 20 vols when salvaging a
1661 * particular volume. Abort if we detect this, since other programs
1662 * supposed to not touch the partition while it is partition-salvaging,
1663 * and we shouldn't find more than 20 vols in a VG.
1665 Abort("Found %ld vol headers, but should have found at most %ld! "
1666 "Make sure the volserver/fileserver are not running at the "
1667 "same time as a partition salvage\n",
1668 afs_printable_int32_ld(params->nVolumes),
1669 afs_printable_int32_ld(params->totalVolumes));
1672 memcpy(params->vsp, &summary, sizeof(summary));
1680 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1682 * If the header could not be read in at all, the header is always unlinked.
1683 * If instead RecordHeader said the header was bad (that is, the header file
1684 * is mis-named), we only unlink if we are doing a partition salvage, as
1685 * opposed to salvaging a specific volume group.
1687 * @param[in] dp the disk partition
1688 * @param[in] name full path to the .vol header
1689 * @param[in] hdr header data, or NULL if the header could not be read
1690 * @param[in] rock actually a struct SalvageScanParams*, with some information
1694 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1695 struct VolumeDiskHeader *hdr, void *rock)
1697 struct SalvageScanParams *params;
1700 params = (struct SalvageScanParams *)rock;
1703 /* no header; header is too bogus to read in at all */
1705 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1711 } else if (!params->singleVolumeNumber) {
1712 /* We were able to read in a header, but RecordHeader said something
1713 * was wrong with it. We only unlink those if we are doing a partition
1720 if (dounlink && unlink(name)) {
1721 Log("Error %d while trying to unlink %s\n", errno, name);
1726 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1727 * the fileserver for VG information, or by scanning the /vicepX partition.
1729 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1730 * are salvaging, or 0 if this is a partition
1733 * @return operation status
1735 * @retval -1 we raced with a fileserver restart; checking out and locking
1736 * volumes must be retried
1739 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1741 afs_int32 nvols = 0;
1742 struct SalvageScanParams params;
1745 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1747 /* we successfully got the vol information from the fileserver; no
1748 * need to scan the partition */
1752 /* we need to retry volume checkout */
1756 if (!singleVolumeNumber) {
1757 /* Count how many volumes we have in /vicepX */
1758 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1761 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1766 nvols = VOL_VG_MAX_VOLS;
1769 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1770 opr_Assert(salvinfo->volumeSummaryp != NULL);
1772 params.singleVolumeNumber = singleVolumeNumber;
1773 params.vsp = salvinfo->volumeSummaryp;
1774 params.nVolumes = 0;
1775 params.totalVolumes = nvols;
1777 params.salvinfo = salvinfo;
1779 /* walk the partition directory of volume headers and record the info
1780 * about them; unlinking invalid headers */
1781 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1782 UnlinkHeader, ¶ms);
1784 /* we apparently need to retry checking-out/locking volumes */
1788 Abort("Failed to get volume header summary\n");
1790 salvinfo->nVolumes = params.nVolumes;
1792 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1798 /* Find the link table. This should be associated with the RW volume or, if
1799 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1802 FindLinkHandle(struct InodeSummary *isp, int nVols,
1803 struct ViceInodeInfo *allInodes)
1806 struct ViceInodeInfo *ip;
1808 for (i = 0; i < nVols; i++) {
1809 ip = allInodes + isp[i].index;
1810 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1811 if (ip[j].u.special.type == VI_LINKTABLE)
1812 return ip[j].inodeNumber;
1819 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1821 struct versionStamp version;
1824 if (!VALID_INO(ino))
1826 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1827 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1828 if (!VALID_INO(ino))
1830 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1831 isp->RWvolumeId, errno);
1832 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1833 fdP = IH_OPEN(salvinfo->VGLinkH);
1835 Abort("Can't open link table for volume %u (error = %d)\n",
1836 isp->RWvolumeId, errno);
1838 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1839 Abort("Can't truncate link table for volume %u (error = %d)\n",
1840 isp->RWvolumeId, errno);
1842 version.magic = LINKTABLEMAGIC;
1843 version.version = LINKTABLEVERSION;
1845 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1847 Abort("Can't truncate link table for volume %u (error = %d)\n",
1848 isp->RWvolumeId, errno);
1850 FDH_REALLYCLOSE(fdP);
1852 /* If the volume summary exits (i.e., the V*.vol header file exists),
1853 * then set this inode there as well.
1855 if (isp->volSummary)
1856 isp->volSummary->header.linkTable = ino;
1865 SVGParms_t *parms = (SVGParms_t *) arg;
1866 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1871 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1874 pthread_attr_t tattr;
1878 /* Initialize per volume global variables, even if later code does so */
1879 salvinfo->VolumeChanged = 0;
1880 salvinfo->VGLinkH = NULL;
1881 salvinfo->VGLinkH_cnt = 0;
1882 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1884 parms.svgp_inodeSummaryp = isp;
1885 parms.svgp_count = nVols;
1886 parms.svgp_salvinfo = salvinfo;
1887 code = pthread_attr_init(&tattr);
1889 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1893 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1895 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1898 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1900 Log("Failed to create thread to salvage volume group %u\n",
1904 (void)pthread_join(tid, NULL);
1906 #endif /* AFS_NT40_ENV */
1909 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1911 struct ViceInodeInfo *inodes, *allInodes, *ip;
1912 int i, totalInodes, size, salvageTo;
1916 int dec_VGLinkH = 0;
1918 FdHandle_t *fdP = NULL;
1920 salvinfo->VGLinkH_cnt = 0;
1921 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1922 && isp->nSpecialInodes > 0);
1923 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1924 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1927 if (ShowMounts && !haveRWvolume)
1929 if (canfork && !debug && Fork() != 0) {
1930 (void)Wait("Salvage volume group");
1933 for (i = 0, totalInodes = 0; i < nVols; i++)
1934 totalInodes += isp[i].nInodes;
1935 size = totalInodes * sizeof(struct ViceInodeInfo);
1936 inodes = malloc(size);
1937 allInodes = inodes - isp->index; /* this would the base of all the inodes
1938 * for the partition, if all the inodes
1939 * had been read into memory */
1941 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1943 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1945 /* Don't try to salvage a read write volume if there isn't one on this
1947 salvageTo = haveRWvolume ? 0 : 1;
1949 #ifdef AFS_NAMEI_ENV
1950 ino = FindLinkHandle(isp, nVols, allInodes);
1951 if (VALID_INO(ino)) {
1952 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1953 fdP = IH_OPEN(salvinfo->VGLinkH);
1955 if (VALID_INO(ino) && fdP != NULL) {
1956 struct versionStamp header;
1957 afs_sfsize_t nBytes;
1959 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1960 if (nBytes != sizeof(struct versionStamp)
1961 || header.magic != LINKTABLEMAGIC) {
1962 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1963 FDH_REALLYCLOSE(fdP);
1967 if (!VALID_INO(ino) || fdP == NULL) {
1968 Log("%s link table for volume %u.\n",
1969 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1971 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1974 struct ViceInodeInfo *ip;
1975 CreateLinkTable(salvinfo, isp, ino);
1976 fdP = IH_OPEN(salvinfo->VGLinkH);
1977 /* Sync fake 1 link counts to the link table, now that it exists */
1979 for (i = 0; i < nVols; i++) {
1980 ip = allInodes + isp[i].index;
1981 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1982 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1989 FDH_REALLYCLOSE(fdP);
1991 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1994 /* Salvage in reverse order--read/write volume last; this way any
1995 * Inodes not referenced by the time we salvage the read/write volume
1996 * can be picked up by the read/write volume */
1997 /* ACTUALLY, that's not done right now--the inodes just vanish */
1998 for (i = nVols - 1; i >= salvageTo; i--) {
2000 struct InodeSummary *lisp = &isp[i];
2001 #ifdef AFS_NAMEI_ENV
2002 /* If only the RO is present on this partition, the link table
2003 * shows up as a RW volume special file. Need to make sure the
2004 * salvager doesn't try to salvage the non-existent RW.
2006 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
2007 /* If this only special inode is the link table, continue */
2008 if (inodes->u.special.type == VI_LINKTABLE) {
2015 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2016 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
2017 /* Check inodes twice. The second time do things seriously. This
2018 * way the whole RO volume can be deleted, below, if anything goes wrong */
2019 for (check = 1; check >= 0; check--) {
2021 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2023 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2024 if (rw && deleteMe) {
2025 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2026 * volume won't be called */
2032 if (rw && check == 1)
2034 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2035 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2041 /* Fix actual inode counts */
2044 Log("totalInodes %d\n",totalInodes);
2045 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2046 static int TraceBadLinkCounts = 0;
2047 #ifdef AFS_NAMEI_ENV
2048 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2049 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2050 VGLinkH_p1 = ip->u.param[0];
2051 continue; /* Deal with this last. */
2054 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2055 TraceBadLinkCounts--; /* Limit reports, per volume */
2056 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2058 while (ip->linkCount > 0) {
2059 /* below used to assert, not break */
2061 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2062 Log("idec failed. inode %s errno %d\n",
2063 PrintInode(stmp, ip->inodeNumber), errno);
2069 while (ip->linkCount < 0) {
2070 /* these used to be asserts */
2072 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2073 Log("iinc failed. inode %s errno %d\n",
2074 PrintInode(stmp, ip->inodeNumber), errno);
2081 #ifdef AFS_NAMEI_ENV
2082 while (dec_VGLinkH > 0) {
2083 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2084 Log("idec failed on link table, errno = %d\n", errno);
2088 while (dec_VGLinkH < 0) {
2089 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2090 Log("iinc failed on link table, errno = %d\n", errno);
2097 /* Directory consistency checks on the rw volume */
2099 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2100 IH_RELEASE(salvinfo->VGLinkH);
2102 if (canfork && !debug) {
2109 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2111 /* Check headers BEFORE forking */
2115 for (i = 0; i < nVols; i++) {
2116 struct VolumeSummary *vs = isp[i].volSummary;
2117 VolumeDiskData volHeader;
2119 /* Don't salvage just because phantom rw volume is there... */
2120 /* (If a read-only volume exists, read/write inodes must also exist) */
2121 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2125 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2126 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2127 == sizeof(volHeader)
2128 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2129 && volHeader.dontSalvage == DONT_SALVAGE
2130 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2131 if (volHeader.inUse != 0) {
2132 volHeader.inUse = 0;
2133 volHeader.inService = 1;
2135 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2136 != sizeof(volHeader)) {
2152 /* SalvageVolumeHeaderFile
2154 * Salvage the top level V*.vol header file. Make sure the special files
2155 * exist and that there are no duplicates.
2157 * Calls SalvageHeader for each possible type of volume special file.
2161 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2162 struct ViceInodeInfo *inodes, int RW,
2163 int check, int *deleteMe)
2166 struct ViceInodeInfo *ip;
2167 int allinodesobsolete = 1;
2168 struct VolumeDiskHeader diskHeader;
2169 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2171 struct VolumeHeader tempHeader;
2172 struct afs_inode_info stuff[MAXINODETYPE];
2174 /* keeps track of special inodes that are probably 'good'; they are
2175 * referenced in the vol header, and are included in the given inodes
2180 } goodspecial[MAXINODETYPE];
2185 memset(goodspecial, 0, sizeof(goodspecial));
2187 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2189 Log("cannot allocate memory for inode skip array when salvaging "
2190 "volume %lu; not performing duplicate special inode recovery\n",
2191 afs_printable_uint32_lu(isp->volumeId));
2192 /* still try to perform the salvage; the skip array only does anything
2193 * if we detect duplicate special inodes */
2196 init_inode_info(&tempHeader, stuff);
2199 * First, look at the special inodes and see if any are referenced by
2200 * the existing volume header. If we find duplicate special inodes, we
2201 * can use this information to use the referenced inode (it's more
2202 * likely to be the 'good' one), and throw away the duplicates.
2204 if (isp->volSummary && skip) {
2205 /* use tempHeader, so we can use the stuff[] array to easily index
2206 * into the isp->volSummary special inodes */
2207 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2209 for (i = 0; i < isp->nSpecialInodes; i++) {
2210 ip = &inodes[isp->index + i];
2211 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2212 /* will get taken care of in a later loop */
2215 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2216 goodspecial[ip->u.special.type-1].valid = 1;
2217 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2222 memset(&tempHeader, 0, sizeof(tempHeader));
2223 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2224 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2225 tempHeader.id = isp->volumeId;
2226 tempHeader.parent = isp->RWvolumeId;
2228 /* Check for duplicates (inodes are sorted by type field) */
2229 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2230 ip = &inodes[isp->index + i];
2231 if (ip->u.special.type == (ip + 1)->u.special.type) {
2232 afs_ino_str_t stmp1, stmp2;
2234 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2235 /* Will be caught in the loop below */
2239 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2240 ip->u.special.type, isp->volumeId,
2241 PrintInode(stmp1, ip->inodeNumber),
2242 PrintInode(stmp2, (ip+1)->inodeNumber));
2244 if (skip && goodspecial[ip->u.special.type-1].valid) {
2245 Inode gi = goodspecial[ip->u.special.type-1].inode;
2248 Log("using special inode referenced by vol header (%s)\n",
2249 PrintInode(stmp1, gi));
2252 /* the volume header references some special inode of
2253 * this type in the inodes array; are we it? */
2254 if (ip->inodeNumber != gi) {
2256 } else if ((ip+1)->inodeNumber != gi) {
2257 /* in case this is the last iteration; we need to
2258 * make sure we check ip+1, too */
2263 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2271 for (i = 0; i < isp->nSpecialInodes; i++) {
2273 ip = &inodes[isp->index + i];
2274 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2276 Log("Rubbish header inode %s of type %d\n",
2277 PrintInode(stmp, ip->inodeNumber),
2278 ip->u.special.type);
2284 Log("Rubbish header inode %s of type %d; deleted\n",
2285 PrintInode(stmp, ip->inodeNumber),
2286 ip->u.special.type);
2287 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2288 if (skip && skip[i]) {
2289 if (orphans == ORPH_REMOVE) {
2290 Log("Removing orphan special inode %s of type %d\n",
2291 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2294 Log("Ignoring orphan special inode %s of type %d\n",
2295 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2296 /* fall through to the ip->linkCount--; line below */
2299 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2300 allinodesobsolete = 0;
2302 if (!check && ip->u.special.type != VI_LINKTABLE)
2303 ip->linkCount--; /* Keep the inode around */
2311 if (allinodesobsolete) {
2318 salvinfo->VGLinkH_cnt++; /* one for every header. */
2320 if (!RW && !check && isp->volSummary) {
2321 ClearROInUseBit(isp->volSummary);
2325 for (i = 0; i < MAXINODETYPE; i++) {
2326 if (stuff[i].inodeType == VI_LINKTABLE) {
2327 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2328 * And we may have recreated the link table earlier, so set the
2329 * RW header as well. The header magic was already checked.
2331 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2332 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2336 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2340 if (isp->volSummary == NULL) {
2342 char headerName[64];
2343 snprintf(headerName, sizeof headerName, VFORMAT,
2344 afs_printable_uint32_lu(isp->volumeId));
2345 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2346 salvinfo->fileSysPath, headerName);
2348 Log("No header file for volume %u\n", isp->volumeId);
2352 Log("No header file for volume %u; %screating %s\n",
2353 isp->volumeId, (Testing ? "it would have been " : ""),
2355 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2357 writefunc = VCreateVolumeDiskHeader;
2360 char headerName[64];
2361 /* hack: these two fields are obsolete... */
2362 isp->volSummary->header.volumeAcl = 0;
2363 isp->volSummary->header.volumeMountTable = 0;
2366 (&isp->volSummary->header, &tempHeader,
2367 sizeof(struct VolumeHeader))) {
2368 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2369 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2370 salvinfo->fileSysPath, headerName);
2372 Log("Header file %s is damaged or no longer valid%s\n", path,
2373 (check ? "" : "; repairing"));
2377 writefunc = VWriteVolumeDiskHeader;
2381 memcpy(&isp->volSummary->header, &tempHeader,
2382 sizeof(struct VolumeHeader));
2385 Log("It would have written a new header file for volume %u\n",
2389 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2390 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2392 Log("Error %ld writing volume header file for volume %lu\n",
2393 afs_printable_int32_ld(code),
2394 afs_printable_uint32_lu(diskHeader.id));
2399 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2400 isp->volSummary->header.volumeInfo);
2405 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2406 struct InodeSummary *isp, int check, int *deleteMe)
2409 VolumeDiskData volumeInfo;
2410 struct versionStamp fileHeader;
2419 #ifndef AFS_NAMEI_ENV
2420 if (sp->inodeType == VI_LINKTABLE)
2421 return 0; /* header magic was already checked */
2423 if (*(sp->inode) == 0) {
2425 Log("Missing inode in volume header (%s)\n", sp->description);
2429 Log("Missing inode in volume header (%s); %s\n", sp->description,
2430 (Testing ? "it would have recreated it" : "recreating"));
2433 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2434 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2435 if (!VALID_INO(*(sp->inode)))
2437 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2438 sp->description, errno);
2443 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2444 fdP = IH_OPEN(specH);
2445 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2446 /* bail out early and destroy the volume */
2448 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2455 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2456 sp->description, errno);
2459 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2460 || header.fileHeader.magic != sp->stamp.magic)) {
2462 Log("Part of the header (%s) is corrupted\n", sp->description);
2463 FDH_REALLYCLOSE(fdP);
2467 Log("Part of the header (%s) is corrupted; recreating\n",
2470 /* header can be garbage; make sure we don't read garbage data from
2472 memset(&header, 0, sizeof(header));
2474 #ifdef AFS_NAMEI_ENV
2475 if (namei_FixSpecialOGM(fdP, check)) {
2476 Log("Error with namei header OGM data (%s)\n", sp->description);
2477 FDH_REALLYCLOSE(fdP);
2482 if (sp->inodeType == VI_VOLINFO
2483 && header.volumeInfo.destroyMe == DESTROY_ME) {
2486 FDH_REALLYCLOSE(fdP);
2490 if (recreate && !Testing) {
2493 ("Internal error: recreating volume header (%s) in check mode\n",
2495 nBytes = FDH_TRUNC(fdP, 0);
2497 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2498 sp->description, errno);
2500 /* The following code should be moved into vutil.c */
2501 if (sp->inodeType == VI_VOLINFO) {
2503 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2504 header.volumeInfo.stamp = sp->stamp;
2505 header.volumeInfo.id = isp->volumeId;
2506 header.volumeInfo.parentId = isp->RWvolumeId;
2507 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2508 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2509 isp->volumeId, isp->volumeId);
2510 header.volumeInfo.inService = 0;
2511 header.volumeInfo.blessed = 0;
2512 /* The + 1000 is a hack in case there are any files out in venus caches */
2513 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2514 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2515 header.volumeInfo.needsCallback = 0;
2516 gettimeofday(&tp, NULL);
2517 header.volumeInfo.creationDate = tp.tv_sec;
2519 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2520 sizeof(header.volumeInfo), 0);
2521 if (nBytes != sizeof(header.volumeInfo)) {
2524 ("Unable to write volume header file (%s) (errno = %d)\n",
2525 sp->description, errno);
2526 Abort("Unable to write entire volume header file (%s)\n",
2530 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2531 if (nBytes != sizeof(sp->stamp)) {
2534 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2535 sp->description, errno);
2537 ("Unable to write entire version stamp in volume header file (%s)\n",
2542 FDH_REALLYCLOSE(fdP);
2544 if (sp->inodeType == VI_VOLINFO) {
2545 salvinfo->VolInfo = header.volumeInfo;
2549 if (salvinfo->VolInfo.updateDate) {
2550 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2552 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2553 salvinfo->VolInfo.id,
2554 (Testing ? "it would have been " : ""), update);
2556 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2558 Log("%s (%u) not updated (created %s)\n",
2559 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2569 SalvageVnodes(struct SalvInfo *salvinfo,
2570 struct InodeSummary *rwIsp,
2571 struct InodeSummary *thisIsp,
2572 struct ViceInodeInfo *inodes, int check)
2574 int ilarge, ismall, ioffset, RW, nInodes;
2575 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2578 RW = (rwIsp == thisIsp);
2579 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2581 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2582 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2583 if (check && ismall == -1)
2586 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2587 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2588 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2592 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2593 struct ViceInodeInfo *ip, int nInodes,
2594 struct VolumeSummary *volSummary, int check)
2596 char buf[SIZEOF_LARGEDISKVNODE];
2597 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2599 StreamHandle_t *file;
2600 struct VnodeClassInfo *vcp;
2602 afs_sfsize_t nVnodes;
2603 afs_fsize_t vnodeLength;
2605 afs_ino_str_t stmp1, stmp2;
2609 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2610 fdP = IH_OPEN(handle);
2611 opr_Assert(fdP != NULL);
2612 file = FDH_FDOPEN(fdP, "r+");
2613 opr_Assert(file != NULL);
2614 vcp = &VnodeClassInfo[class];
2615 size = OS_SIZE(fdP->fd_fd);
2616 opr_Assert(size != -1);
2617 nVnodes = (size / vcp->diskSize) - 1;
2619 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2620 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2624 for (vnodeIndex = 0;
2625 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2626 nVnodes--, vnodeIndex++) {
2627 if (vnode->type != vNull) {
2628 int vnodeChanged = 0;
2629 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2630 if (VNDISK_GET_INO(vnode) == 0) {
2632 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2633 memset(vnode, 0, vcp->diskSize);
2637 if (vcp->magic != vnode->vnodeMagic) {
2638 /* bad magic #, probably partially created vnode */
2640 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2641 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2642 afs_printable_uint32_lu(vcp->magic));
2643 memset(vnode, 0, vcp->diskSize);
2647 Log("Partially allocated vnode %d deleted.\n",
2649 memset(vnode, 0, vcp->diskSize);
2653 /* ****** Should do a bit more salvage here: e.g. make sure
2654 * vnode type matches what it should be given the index */
2655 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2656 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2657 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2658 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2665 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2666 /* The following doesn't work, because the version number
2667 * is not maintained correctly by the file server */
2668 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2669 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2671 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2677 /* For RW volume, look for vnode with matching inode number;
2678 * if no such match, take the first determined by our sort
2680 struct ViceInodeInfo *lip = ip;
2681 int lnInodes = nInodes;
2683 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2684 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2693 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2694 /* "Matching" inode */
2698 vu = vnode->uniquifier;
2699 iu = ip->u.vnode.vnodeUniquifier;
2700 vd = vnode->dataVersion;
2701 id = ip->u.vnode.inodeDataVersion;
2703 * Because of the possibility of the uniquifier overflows (> 4M)
2704 * we compare them modulo the low 22-bits; we shouldn't worry
2705 * about mismatching since they shouldn't to many old
2706 * uniquifiers of the same vnode...
2708 if (IUnique(vu) != IUnique(iu)) {
2710 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2713 vnode->uniquifier = iu;
2714 #ifdef AFS_3DISPARES
2715 vnode->dataVersion = (id >= vd ?
2718 1887437 ? vd : id) :
2721 1887437 ? id : vd));
2723 #if defined(AFS_SGI_EXMAG)
2724 vnode->dataVersion = (id >= vd ?
2727 15099494 ? vd : id) :
2730 15099494 ? id : vd));
2732 vnode->dataVersion = (id > vd ? id : vd);
2733 #endif /* AFS_SGI_EXMAG */
2734 #endif /* AFS_3DISPARES */
2737 /* don't bother checking for vd > id any more, since
2738 * partial file transfers always result in this state,
2739 * and you can't do much else anyway (you've already
2740 * found the best data you can) */
2741 #ifdef AFS_3DISPARES
2742 if (!vnodeIsDirectory(vnodeNumber)
2743 && ((vd < id && (id - vd) < 1887437)
2744 || ((vd > id && (vd - id) > 1887437)))) {
2746 #if defined(AFS_SGI_EXMAG)
2747 if (!vnodeIsDirectory(vnodeNumber)
2748 && ((vd < id && (id - vd) < 15099494)
2749 || ((vd > id && (vd - id) > 15099494)))) {
2751 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2752 #endif /* AFS_SGI_EXMAG */
2755 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2756 vnode->dataVersion = id;
2761 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2764 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2766 VNDISK_SET_INO(vnode, ip->inodeNumber);
2771 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2773 VNDISK_SET_INO(vnode, ip->inodeNumber);
2776 VNDISK_GET_LEN(vnodeLength, vnode);
2777 if (ip->byteCount != vnodeLength) {
2780 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2785 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2786 VNDISK_SET_LEN(vnode, ip->byteCount);
2790 ip->linkCount--; /* Keep the inode around */
2793 } else { /* no matching inode */
2795 if (VNDISK_GET_INO(vnode) != 0
2796 || vnode->type == vDirectory) {
2797 /* No matching inode--get rid of the vnode */
2799 if (VNDISK_GET_INO(vnode)) {
2801 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2805 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2810 if (VNDISK_GET_INO(vnode)) {
2812 time_t serverModifyTime = vnode->serverModifyTime;
2813 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2817 time_t serverModifyTime = vnode->serverModifyTime;
2818 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2821 memset(vnode, 0, vcp->diskSize);
2824 /* Should not reach here becuase we checked for
2825 * (inodeNumber == 0) above. And where we zero the vnode,
2826 * we also goto vnodeDone.
2830 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2834 } /* VNDISK_GET_INO(vnode) != 0 */
2836 opr_Assert(!(vnodeChanged && check));
2837 if (vnodeChanged && !Testing) {
2838 opr_Verify(IH_IWRITE(handle,
2839 vnodeIndexOffset(vcp, vnodeNumber),
2840 (char *)vnode, vcp->diskSize)
2842 salvinfo->VolumeChanged = 1; /* For break call back */
2853 struct VnodeEssence *
2854 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2857 struct VnodeInfo *vip;
2860 class = vnodeIdToClass(vnodeNumber);
2861 vip = &salvinfo->vnodeInfo[class];
2862 offset = vnodeIdToBitNumber(vnodeNumber);
2863 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2867 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2869 /* Copy the directory unconditionally if we are going to change it:
2870 * not just if was cloned.
2872 struct VnodeDiskObject vnode;
2873 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2874 Inode oldinode, newinode;
2877 if (dir->copied || Testing)
2879 DFlush(); /* Well justified paranoia... */
2882 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2883 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2885 opr_Assert(code == sizeof(vnode));
2886 oldinode = VNDISK_GET_INO(&vnode);
2887 /* Increment the version number by a whole lot to avoid problems with
2888 * clients that were promised new version numbers--but the file server
2889 * crashed before the versions were written to disk.
2892 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2893 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2895 opr_Assert(VALID_INO(newinode));
2896 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
2899 VNDISK_SET_INO(&vnode, newinode);
2901 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2902 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2904 opr_Assert(code == sizeof(vnode));
2906 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2907 salvinfo->fileSysDevice, newinode,
2908 &salvinfo->VolumeChanged);
2909 /* Don't delete the original inode right away, because the directory is
2910 * still being scanned.
2916 * This function should either successfully create a new dir, or give up
2917 * and leave things the way they were. In particular, if it fails to write
2918 * the new dir properly, it should return w/o changing the reference to the
2922 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2924 struct VnodeDiskObject vnode;
2925 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2926 Inode oldinode, newinode;
2931 afs_int32 parentUnique = 1;
2932 struct VnodeEssence *vnodeEssence;
2937 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2939 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2940 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2942 opr_Assert(lcode == sizeof(vnode));
2943 oldinode = VNDISK_GET_INO(&vnode);
2944 /* Increment the version number by a whole lot to avoid problems with
2945 * clients that were promised new version numbers--but the file server
2946 * crashed before the versions were written to disk.
2949 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2950 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2952 opr_Assert(VALID_INO(newinode));
2953 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2954 &salvinfo->VolumeChanged);
2956 /* Assign . and .. vnode numbers from dir and vnode.parent.
2957 * The uniquifier for . is in the vnode.
2958 * The uniquifier for .. might be set to a bogus value of 1 and
2959 * the salvager will later clean it up.
2961 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2962 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2965 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2967 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2972 /* didn't really build the new directory properly, let's just give up. */
2973 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2974 Log("Directory salvage returned code %d, continuing.\n", code);
2976 Log("also failed to decrement link count on new inode");
2980 Log("Checking the results of the directory salvage...\n");
2981 if (!DirOK(&newdir)) {
2982 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2983 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2984 opr_Assert(code == 0);
2988 VNDISK_SET_INO(&vnode, newinode);
2989 length = afs_dir_Length(&newdir);
2990 VNDISK_SET_LEN(&vnode, length);
2992 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2993 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2995 opr_Assert(lcode == sizeof(vnode));
2998 nt_sync(salvinfo->fileSysDevice);
3000 sync(); /* this is slow, but hopefully rarely called. We don't have
3001 * an open FD on the file itself to fsync.
3005 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
3007 /* make sure old directory file is really closed */
3008 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3009 FDH_REALLYCLOSE(fdP);
3011 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3012 opr_Assert(code == 0);
3013 dir->dirHandle = newdir;
3017 * arguments for JudgeEntry.
3019 struct judgeEntry_params {
3020 struct DirSummary *dir; /**< directory we're examining entries in */
3021 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3025 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3028 struct judgeEntry_params *params = arock;
3029 struct DirSummary *dir = params->dir;
3030 struct SalvInfo *salvinfo = params->salvinfo;
3031 struct VnodeEssence *vnodeEssence;
3032 afs_int32 dirOrphaned, todelete;
3034 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3036 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3037 if (vnodeEssence == NULL) {
3039 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3042 CopyOnWrite(salvinfo, dir);
3043 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3048 #ifndef AFS_NAMEI_ENV
3049 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3050 * mount inode for the partition. If this inode were deleted, it would crash
3053 if (vnodeEssence->InodeNumber == 0) {
3054 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3056 CopyOnWrite(salvinfo, dir);
3057 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3064 if (!(vnodeNumber & 1) && !Showmode
3065 && !(vnodeEssence->count || vnodeEssence->unique
3066 || vnodeEssence->modeBits)) {
3067 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3068 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3069 vnodeNumber, unique,
3070 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3074 CopyOnWrite(salvinfo, dir);
3075 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3081 /* Check if the Uniquifiers match. If not, change the directory entry
3082 * so its unique matches the vnode unique. Delete if the unique is zero
3083 * or if the directory is orphaned.
3085 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3086 if (!vnodeEssence->unique
3087 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3088 /* This is an orphaned directory. Don't delete the . or ..
3089 * entry. Otherwise, it will get created in the next
3090 * salvage and deleted again here. So Just skip it.
3095 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3098 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3102 fid.Vnode = vnodeNumber;
3103 fid.Unique = vnodeEssence->unique;
3104 CopyOnWrite(salvinfo, dir);
3105 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3107 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3110 return 0; /* no need to continue */
3113 if (strcmp(name, ".") == 0) {
3114 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3116 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3119 CopyOnWrite(salvinfo, dir);
3120 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3121 fid.Vnode = dir->vnodeNumber;
3122 fid.Unique = dir->unique;
3123 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3124 vnodeNumber = fid.Vnode; /* Get the new Essence */
3125 unique = fid.Unique;
3126 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3130 } else if (strcmp(name, "..") == 0) {
3133 struct VnodeEssence *dotdot;
3134 pa.Vnode = dir->parent;
3135 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3136 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3137 pa.Unique = dotdot->unique;
3139 pa.Vnode = dir->vnodeNumber;
3140 pa.Unique = dir->unique;
3142 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3144 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3146 CopyOnWrite(salvinfo, dir);
3147 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3148 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3151 vnodeNumber = pa.Vnode; /* Get the new Essence */
3153 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3155 dir->haveDotDot = 1;
3156 } else if (strncmp(name, ".__afs", 6) == 0) {
3158 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3161 CopyOnWrite(salvinfo, dir);
3162 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3164 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3165 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3168 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3169 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3170 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3171 && !(vnodeEssence->modeBits & 0111)) {
3172 afs_sfsize_t nBytes;
3178 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3179 vnodeEssence->InodeNumber);
3182 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3186 size = FDH_SIZE(fdP);
3188 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3189 FDH_REALLYCLOSE(fdP);
3196 nBytes = FDH_PREAD(fdP, buf, size, 0);
3197 if (nBytes == size) {
3199 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3200 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3201 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3202 Testing ? "would convert" : "converted");
3203 vnodeEssence->modeBits |= 0111;
3204 vnodeEssence->changed = 1;
3205 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3206 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3207 dir->name ? dir->name : "??", name, buf);
3209 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3210 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3212 FDH_REALLYCLOSE(fdP);
3215 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3216 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3217 if (vnodeIdToClass(vnodeNumber) == vLarge
3218 && vnodeEssence->name == NULL) {
3219 vnodeEssence->name = strdup(name);
3222 /* The directory entry points to the vnode. Check to see if the
3223 * vnode points back to the directory. If not, then let the
3224 * directory claim it (else it might end up orphaned). Vnodes
3225 * already claimed by another directory are deleted from this
3226 * directory: hardlinks to the same vnode are not allowed
3227 * from different directories.
3229 if (vnodeEssence->parent != dir->vnodeNumber) {
3230 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3231 /* Vnode does not point back to this directory.
3232 * Orphaned dirs cannot claim a file (it may belong to
3233 * another non-orphaned dir).
3236 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3238 vnodeEssence->parent = dir->vnodeNumber;
3239 vnodeEssence->changed = 1;
3241 /* Vnode was claimed by another directory */
3244 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3245 } else if (vnodeNumber == 1) {
3246 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3248 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3252 CopyOnWrite(salvinfo, dir);
3253 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3258 /* This directory claims the vnode */
3259 vnodeEssence->claimed = 1;
3261 vnodeEssence->count--;
3266 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3267 VnodeClass class, Inode ino, Unique * maxu)
3269 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3270 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3271 char buf[SIZEOF_LARGEDISKVNODE];
3272 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3274 StreamHandle_t *file;
3279 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3280 fdP = IH_OPEN(vip->handle);
3281 opr_Assert(fdP != NULL);
3282 file = FDH_FDOPEN(fdP, "r+");
3283 opr_Assert(file != NULL);
3284 size = OS_SIZE(fdP->fd_fd);
3285 opr_Assert(size != -1);
3286 vip->nVnodes = (size / vcp->diskSize) - 1;
3287 if (vip->nVnodes > 0) {
3288 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3289 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3290 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3291 sizeof(struct VnodeEssence)))
3293 if (class == vLarge) {
3294 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3304 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3305 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3306 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3307 nVnodes--, vnodeIndex++) {
3308 if (vnode->type != vNull) {
3309 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3310 afs_fsize_t vnodeLength;
3311 vip->nAllocatedVnodes++;
3312 vep->count = vnode->linkCount;
3313 VNDISK_GET_LEN(vnodeLength, vnode);
3314 vep->blockCount = nBlocks(vnodeLength);
3315 vip->volumeBlockCount += vep->blockCount;
3316 vep->parent = vnode->parent;
3317 vep->unique = vnode->uniquifier;
3318 if (*maxu < vnode->uniquifier)
3319 *maxu = vnode->uniquifier;
3320 vep->modeBits = vnode->modeBits;
3321 vep->InodeNumber = VNDISK_GET_INO(vnode);
3322 vep->type = vnode->type;
3323 vep->author = vnode->author;
3324 vep->owner = vnode->owner;
3325 vep->group = vnode->group;
3326 if (vnode->type == vDirectory) {
3327 if (class != vLarge) {
3328 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3329 vip->nAllocatedVnodes--;
3330 memset(vnode, 0, sizeof(*vnode));
3331 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3332 vnodeIndexOffset(vcp, vnodeNumber),
3333 (char *)&vnode, sizeof(vnode));
3334 salvinfo->VolumeChanged = 1;
3336 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3345 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3348 struct VnodeEssence *parentvp;
3354 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3355 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3356 strcat(path, OS_DIRSEP);
3357 strcat(path, vp->name);
3363 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3364 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3367 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3369 struct VnodeEssence *vep;
3372 return (1); /* Vnode zero does not exist */
3374 return (0); /* The root dir vnode is always claimed */
3375 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3376 if (!vep || !vep->claimed)
3377 return (1); /* Vnode is not claimed - it is orphaned */
3379 return (IsVnodeOrphaned(salvinfo, vep->parent));
3383 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3384 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3385 struct DirSummary *rootdir, int *rootdirfound)
3387 static struct DirSummary dir;
3388 static struct DirHandle dirHandle;
3389 struct VnodeEssence *parent;
3390 static char path[MAXPATHLEN];
3393 if (dirVnodeInfo->vnodes[i].salvaged)
3394 return; /* already salvaged */
3397 dirVnodeInfo->vnodes[i].salvaged = 1;
3399 if (dirVnodeInfo->inodes[i] == 0)
3400 return; /* Not allocated to a directory */
3402 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3403 if (dirVnodeInfo->vnodes[i].parent) {
3404 Log("Bad parent, vnode 1; %s...\n",
3405 (Testing ? "skipping" : "salvaging"));
3406 dirVnodeInfo->vnodes[i].parent = 0;
3407 dirVnodeInfo->vnodes[i].changed = 1;
3410 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3411 if (parent && parent->salvaged == 0)
3412 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3413 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3414 rootdir, rootdirfound);
3417 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3418 dir.unique = dirVnodeInfo->vnodes[i].unique;
3421 dir.parent = dirVnodeInfo->vnodes[i].parent;
3422 dir.haveDot = dir.haveDotDot = 0;
3423 dir.ds_linkH = alinkH;
3424 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3425 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3427 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3430 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3431 (Testing ? "skipping" : "salvaging"));
3434 CopyAndSalvage(salvinfo, &dir);
3436 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3439 dirHandle = dir.dirHandle;
3442 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3443 &dirVnodeInfo->vnodes[i], path);
3446 /* If enumeration failed for random reasons, we will probably delete
3447 * too much stuff, so we guard against this instead.
3449 struct judgeEntry_params judge_params;
3450 judge_params.salvinfo = salvinfo;
3451 judge_params.dir = &dir;
3453 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3454 &judge_params) == 0);
3457 /* Delete the old directory if it was copied in order to salvage.
3458 * CopyOnWrite has written the new inode # to the disk, but we still
3459 * have the old one in our local structure here. Thus, we idec the
3463 if (dir.copied && !Testing) {
3464 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3465 opr_Assert(code == 0);
3466 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3469 /* Remember rootdir DirSummary _after_ it has been judged */
3470 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3471 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3479 * Get a new FID that can be used to create a new file.
3481 * @param[in] volHeader vol header for the volume
3482 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3483 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3484 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3485 * updated to the new max unique if we create a new
3489 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3490 VnodeClass class, AFSFid *afid, Unique *maxunique)
3493 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3494 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3498 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3499 /* no free vnodes; make a new one */
3500 salvinfo->vnodeInfo[class].nVnodes++;
3501 salvinfo->vnodeInfo[class].vnodes =
3502 realloc(salvinfo->vnodeInfo[class].vnodes,
3503 sizeof(struct VnodeEssence) * (i+1));
3505 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3508 afid->Vnode = bitNumberToVnodeNumber(i, class);
3510 if (volHeader->uniquifier < (*maxunique + 1)) {
3511 /* header uniq is bad; it will get bumped by 2000 later */
3512 afid->Unique = *maxunique + 1 + 2000;
3515 /* header uniq seems okay; just use that */
3516 afid->Unique = *maxunique = volHeader->uniquifier++;
3521 * Create a vnode for a README file explaining not to use a recreated-root vol.
3523 * @param[in] volHeader vol header for the volume
3524 * @param[in] alinkH ihandle for i/o for the volume
3525 * @param[in] vid volume id
3526 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3527 * updated to the new max unique if we create a new
3529 * @param[out] afid FID for the new readme vnode
3530 * @param[out] ainode the inode for the new readme file
3532 * @return operation status
3537 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3538 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3542 struct VnodeDiskObject *rvnode = NULL;
3544 IHandle_t *readmeH = NULL;
3545 struct VnodeEssence *vep;
3547 time_t now = time(NULL);
3549 /* Try to make the note brief, but informative. Only administrators should
3550 * be able to read this file at first, so we can hopefully assume they
3551 * know what AFS is, what a volume is, etc. */
3553 "This volume has been salvaged, but has lost its original root directory.\n"
3554 "The root directory that exists now has been recreated from orphan files\n"
3555 "from the rest of the volume. This recreated root directory may interfere\n"
3556 "with old cached data on clients, and there is no way the salvager can\n"
3557 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3558 "use this volume, but only copy the salvaged data to a new volume.\n"
3559 "Continuing to use this volume as it exists now may cause some clients to\n"
3560 "behave oddly when accessing this volume.\n"
3561 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3562 /* ^ the person reading this probably just lost some data, so they could
3563 * use some cheering up. */
3565 /* -1 for the trailing NUL */
3566 length = sizeof(readme) - 1;
3568 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3570 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3572 /* create the inode and write the contents */
3573 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3574 salvinfo->fileSysPath, 0, vid,
3575 afid->Vnode, afid->Unique, 1);
3576 if (!VALID_INO(readmeinode)) {
3577 Log("CreateReadme: readme IH_CREATE failed\n");
3581 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3582 bytes = IH_IWRITE(readmeH, 0, readme, length);
3583 IH_RELEASE(readmeH);
3585 if (bytes != length) {
3586 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3587 (int)sizeof(readme));
3591 /* create the vnode and write it out */
3592 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3594 Log("CreateRootDir: error alloc'ing memory\n");
3598 rvnode->type = vFile;
3600 rvnode->modeBits = 0777;
3601 rvnode->linkCount = 1;
3602 VNDISK_SET_LEN(rvnode, length);
3603 rvnode->uniquifier = afid->Unique;
3604 rvnode->dataVersion = 1;
3605 VNDISK_SET_INO(rvnode, readmeinode);
3606 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3611 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3613 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3614 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3615 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3617 if (bytes != SIZEOF_SMALLDISKVNODE) {
3618 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3619 (int)SIZEOF_SMALLDISKVNODE);
3623 /* update VnodeEssence for new readme vnode */
3624 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3626 vep->blockCount = nBlocks(length);
3627 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3628 vep->parent = rvnode->parent;
3629 vep->unique = rvnode->uniquifier;
3630 vep->modeBits = rvnode->modeBits;
3631 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3632 vep->type = rvnode->type;
3633 vep->author = rvnode->author;
3634 vep->owner = rvnode->owner;
3635 vep->group = rvnode->group;
3645 *ainode = readmeinode;
3650 if (IH_DEC(alinkH, readmeinode, vid)) {
3651 Log("CreateReadme (recovery): IH_DEC failed\n");
3663 * create a root dir for a volume that lacks one.
3665 * @param[in] volHeader vol header for the volume
3666 * @param[in] alinkH ihandle for disk access for this volume group
3667 * @param[in] vid volume id we're dealing with
3668 * @param[out] rootdir populated with info about the new root dir
3669 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3670 * updated to the new max unique if we create a new
3673 * @return operation status
3678 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3679 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3683 int decroot = 0, decreadme = 0;
3684 AFSFid did, readmeid;
3687 struct VnodeDiskObject *rootvnode = NULL;
3688 struct acl_accessList *ACL;
3691 struct VnodeEssence *vep;
3692 Inode readmeinode = 0;
3693 time_t now = time(NULL);
3695 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3696 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3700 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3701 /* We don't have any large vnodes in the volume; allocate room
3702 * for one so we can recreate the root dir */
3703 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3704 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3705 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3707 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3708 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3711 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3712 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3713 if (vep->type != vNull) {
3714 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3718 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3719 &readmeinode) != 0) {
3724 /* set the DV to a very high number, so it is unlikely that we collide
3725 * with a cached DV */
3728 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3730 if (!VALID_INO(rootinode)) {
3731 Log("CreateRootDir: IH_CREATE failed\n");
3736 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3737 rootinode, &salvinfo->VolumeChanged);
3741 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3742 Log("CreateRootDir: MakeDir failed\n");
3745 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3746 Log("CreateRootDir: Create failed\n");
3750 length = afs_dir_Length(&rootdir->dirHandle);
3751 DZap(&rootdir->dirHandle);
3753 /* create the new root dir vnode */
3754 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3756 Log("CreateRootDir: malloc failed\n");
3760 /* only give 'rl' permissions to 'system:administrators'. We do this to
3761 * try to catch the attention of an administrator, that they should not
3762 * be writing to this directory or continue to use it. */
3763 ACL = VVnodeDiskACL(rootvnode);
3764 ACL->size = sizeof(struct acl_accessList);
3765 ACL->version = ACL_ACLVERSION;
3769 ACL->entries[0].id = -204; /* system:administrators */
3770 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3772 rootvnode->type = vDirectory;
3773 rootvnode->cloned = 0;
3774 rootvnode->modeBits = 0777;
3775 rootvnode->linkCount = 2;
3776 VNDISK_SET_LEN(rootvnode, length);
3777 rootvnode->uniquifier = 1;
3778 rootvnode->dataVersion = dv;
3779 VNDISK_SET_INO(rootvnode, rootinode);
3780 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3781 rootvnode->author = 0;
3782 rootvnode->owner = 0;
3783 rootvnode->parent = 0;
3784 rootvnode->group = 0;
3785 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3787 /* write it out to disk */
3788 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3789 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3790 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3792 if (bytes != SIZEOF_LARGEDISKVNODE) {
3793 /* just cast to int and don't worry about printing real 64-bit ints;
3794 * a large disk vnode isn't anywhere near the 32-bit limit */
3795 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3796 (int)SIZEOF_LARGEDISKVNODE);
3800 /* update VnodeEssence for the new root vnode */
3801 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3803 vep->blockCount = nBlocks(length);
3804 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3805 vep->parent = rootvnode->parent;
3806 vep->unique = rootvnode->uniquifier;
3807 vep->modeBits = rootvnode->modeBits;
3808 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3809 vep->type = rootvnode->type;
3810 vep->author = rootvnode->author;
3811 vep->owner = rootvnode->owner;
3812 vep->group = rootvnode->group;
3822 /* update DirSummary for the new root vnode */
3823 rootdir->vnodeNumber = 1;
3824 rootdir->unique = 1;
3825 rootdir->haveDot = 1;
3826 rootdir->haveDotDot = 1;
3827 rootdir->rwVid = vid;
3828 rootdir->copied = 0;
3829 rootdir->parent = 0;
3830 rootdir->name = strdup(".");
3831 rootdir->vname = volHeader->name;
3832 rootdir->ds_linkH = alinkH;
3839 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3840 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3842 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3843 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3853 * salvage a volume group.
3855 * @param[in] salvinfo information for the curent salvage job
3856 * @param[in] rwIsp inode summary for rw volume
3857 * @param[in] alinkH link table inode handle
3859 * @return operation status
3863 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3865 /* This routine, for now, will only be called for read-write volumes */
3867 int BlocksInVolume = 0, FilesInVolume = 0;
3869 struct DirSummary rootdir, oldrootdir;
3870 struct VnodeInfo *dirVnodeInfo;
3871 struct VnodeDiskObject vnode;
3872 VolumeDiskData volHeader;
3874 int orphaned, rootdirfound = 0;
3875 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3876 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3877 struct VnodeEssence *vep;
3880 afs_sfsize_t nBytes;
3882 VnodeId LFVnode, ThisVnode;
3883 Unique LFUnique, ThisUnique;
3887 vid = rwIsp->volSummary->header.id;
3888 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3889 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3890 opr_Assert(nBytes == sizeof(volHeader));
3891 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3892 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3893 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3895 DistilVnodeEssence(salvinfo, vid, vLarge,
3896 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3897 DistilVnodeEssence(salvinfo, vid, vSmall,
3898 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3900 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3901 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3902 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3903 &rootdir, &rootdirfound);
3906 nt_sync(salvinfo->fileSysDevice);
3908 sync(); /* This used to be done lower level, for every dir */
3915 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3917 Log("Cannot find root directory for volume %lu; attempting to create "
3918 "a new one\n", afs_printable_uint32_lu(vid));
3920 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3925 salvinfo->VolumeChanged = 1;
3929 /* Parse each vnode looking for orphaned vnodes and
3930 * connect them to the tree as orphaned (if requested).
3932 oldrootdir = rootdir;
3933 for (class = 0; class < nVNODECLASSES; class++) {
3934 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3935 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3936 ThisVnode = bitNumberToVnodeNumber(v, class);
3937 ThisUnique = vep->unique;
3939 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3940 continue; /* Ignore unused, claimed, and root vnodes */
3942 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3943 * entry in this vnode had incremented the parent link count (In
3944 * JudgeEntry()). We need to go to the parent and decrement that
3945 * link count. But if the parent's unique is zero, then the parent
3946 * link count was not incremented in JudgeEntry().
3948 if (class == vLarge) { /* directory vnode */
3949 pv = vnodeIdToBitNumber(vep->parent);
3950 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3951 if (vep->parent == 1 && newrootdir) {
3952 /* this vnode's parent was the volume root, and
3953 * we just created the volume root. So, the parent
3954 * dir didn't exist during JudgeEntry, so the link
3955 * count was not inc'd there, so don't dec it here.
3961 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3967 continue; /* If no rootdir, can't attach orphaned files */
3969 /* Here we attach orphaned files and directories into the
3970 * root directory, LVVnode, making sure link counts stay correct.
3972 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3973 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3974 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3976 /* Update this orphaned vnode's info. Its parent info and
3977 * link count (do for orphaned directories and files).
3979 vep->parent = LFVnode; /* Parent is the root dir */
3980 vep->unique = LFUnique;
3983 vep->count--; /* Inc link count (root dir will pt to it) */
3985 /* If this orphaned vnode is a directory, change '..'.
3986 * The name of the orphaned dir/file is unknown, so we
3987 * build a unique name. No need to CopyOnWrite the directory
3988 * since it is not connected to tree in BK or RO volume and
3989 * won't be visible there.
3991 if (class == vLarge) {
3995 /* Remove and recreate the ".." entry in this orphaned directory */
3996 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3997 salvinfo->vnodeInfo[class].inodes[v],
3998 &salvinfo->VolumeChanged);
4000 pa.Unique = LFUnique;
4001 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4002 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4004 /* The original parent's link count was decremented above.
4005 * Here we increment the new parent's link count.
4007 pv = vnodeIdToBitNumber(LFVnode);
4008 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4012 /* Go to the root dir and add this entry. The link count of the
4013 * root dir was incremented when ".." was created. Try 10 times.
4015 for (j = 0; j < 10; j++) {
4016 pa.Vnode = ThisVnode;
4017 pa.Unique = ThisUnique;
4019 snprintf(npath, sizeof npath, "%s.%u.%u",
4020 ((class == vLarge) ? "__ORPHANDIR__"
4021 : "__ORPHANFILE__"),
4022 ThisVnode, ThisUnique);
4024 CopyOnWrite(salvinfo, &rootdir);
4025 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4029 ThisUnique += 50; /* Try creating a different file */
4031 opr_Assert(code == 0);
4032 Log("Attaching orphaned %s to volume's root dir as %s\n",
4033 ((class == vLarge) ? "directory" : "file"), npath);
4035 } /* for each vnode in the class */
4036 } /* for each class of vnode */
4038 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4040 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4042 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4044 opr_Assert(code == 0);
4045 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4048 DFlush(); /* Flush the changes */
4049 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4050 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4051 orphans = ORPH_IGNORE;
4054 /* Write out all changed vnodes. Orphaned files and directories
4055 * will get removed here also (if requested).
4057 for (class = 0; class < nVNODECLASSES; class++) {
4058 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4059 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4060 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4061 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4062 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4063 for (i = 0; i < nVnodes; i++) {
4064 struct VnodeEssence *vnp = &vnodes[i];
4065 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4067 /* If the vnode is good but is unclaimed (not listed in
4068 * any directory entries), then it is orphaned.
4071 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4072 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4076 if (vnp->changed || vnp->count) {
4079 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4080 vnodeIndexOffset(vcp, vnodeNumber),
4081 (char *)&vnode, sizeof(vnode));
4082 opr_Assert(nBytes == sizeof(vnode));
4084 vnode.parent = vnp->parent;
4085 oldCount = vnode.linkCount;
4086 vnode.linkCount = vnode.linkCount - vnp->count;
4089 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4091 if (!vnp->todelete) {
4092 /* Orphans should have already been attached (if requested) */
4093 opr_Assert(orphans != ORPH_ATTACH);
4094 oblocks += vnp->blockCount;
4097 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4099 BlocksInVolume -= vnp->blockCount;
4101 if (VNDISK_GET_INO(&vnode)) {
4103 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4104 opr_Assert(code == 0);
4106 memset(&vnode, 0, sizeof(vnode));
4108 } else if (vnp->count) {
4110 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4113 vnode.modeBits = vnp->modeBits;
4116 vnode.dataVersion++;
4119 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4120 vnodeIndexOffset(vcp, vnodeNumber),
4121 (char *)&vnode, sizeof(vnode));
4122 opr_Assert(nBytes == sizeof(vnode));
4124 salvinfo->VolumeChanged = 1;
4128 if (!Showmode && ofiles) {
4129 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4131 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4135 for (class = 0; class < nVNODECLASSES; class++) {
4136 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4137 for (i = 0; i < vip->nVnodes; i++)
4138 if (vip->vnodes[i].name)
4139 free(vip->vnodes[i].name);
4146 /* Set correct resource utilization statistics */
4147 volHeader.filecount = FilesInVolume;
4148 volHeader.diskused = BlocksInVolume;
4150 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4151 if (volHeader.uniquifier < (maxunique + 1)) {
4153 Log("Volume uniquifier is too low; fixed\n");
4154 /* Plus 2,000 in case there are workstations out there with
4155 * cached vnodes that have since been deleted
4157 volHeader.uniquifier = (maxunique + 1 + 2000);
4161 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4162 "Only use this salvaged volume to copy data to another volume; "
4163 "do not continue to use this volume (%lu) as-is.\n",
4164 afs_printable_uint32_lu(vid));
4167 if (!Testing && salvinfo->VolumeChanged) {
4168 #ifdef FSSYNC_BUILD_CLIENT
4169 if (salvinfo->useFSYNC) {
4170 afs_int32 fsync_code;
4172 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4174 Log("Error trying to tell the fileserver to break callbacks for "
4175 "changed volume %lu; error code %ld\n",
4176 afs_printable_uint32_lu(vid),
4177 afs_printable_int32_ld(fsync_code));
4179 salvinfo->VolumeChanged = 0;
4182 #endif /* FSSYNC_BUILD_CLIENT */
4184 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4185 if (!salvinfo->useFSYNC) {
4186 /* A volume's contents have changed, but the fileserver will not
4187 * break callbacks on the volume until it tries to load the vol
4188 * header. So, to reduce the amount of time a client could have
4189 * stale data, remove fsstate.dat, so the fileserver will init
4190 * callback state with all clients. This is a very coarse hammer,
4191 * and in the future we should just record which volumes have
4193 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4194 if (code && errno != ENOENT) {
4195 Log("Error %d when trying to unlink FS state file %s\n", errno,
4196 AFSDIR_SERVER_FSSTATE_FILEPATH);
4202 /* Turn off the inUse bit; the volume's been salvaged! */
4203 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4204 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4205 volHeader.inService = 1; /* allow service again */
4206 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4207 volHeader.dontSalvage = DONT_SALVAGE;
4208 salvinfo->VolumeChanged = 0;
4210 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4211 opr_Assert(nBytes == sizeof(volHeader));
4214 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4215 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4216 FilesInVolume, BlocksInVolume);
4219 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4220 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4226 ClearROInUseBit(struct VolumeSummary *summary)
4228 IHandle_t *h = summary->volumeInfoHandle;
4229 afs_sfsize_t nBytes;
4231 VolumeDiskData volHeader;
4233 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4234 opr_Assert(nBytes == sizeof(volHeader));
4235 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4236 volHeader.inUse = 0;
4237 volHeader.needsSalvaged = 0;
4238 volHeader.inService = 1;
4239 volHeader.dontSalvage = DONT_SALVAGE;
4241 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4242 opr_Assert(nBytes == sizeof(volHeader));
4247 * Possible delete the volume.
4249 * deleteMe - Always do so, only a partial volume.
4252 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4253 char *message, int deleteMe, int check)
4255 if (readOnly(isp) || deleteMe) {
4256 if (isp->volSummary && !isp->volSummary->deleted) {
4259 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4261 Log("It will be deleted on this server (you may find it elsewhere)\n");
4264 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4266 Log("it will be deleted instead. It should be recloned.\n");
4271 char filename[VMAXPATHLEN];
4272 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4273 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4275 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4277 Log("Error %ld destroying volume disk header for volume %lu\n",
4278 afs_printable_int32_ld(code),
4279 afs_printable_uint32_lu(isp->volumeId));
4282 /* make sure we actually delete the header file; ENOENT
4283 * is fine, since VDestroyVolumeDiskHeader probably already
4285 if (unlink(path) && errno != ENOENT) {
4286 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4288 if (salvinfo->useFSYNC) {
4289 AskDelete(salvinfo, isp->volumeId);
4291 isp->volSummary->deleted = 1;
4294 } else if (!check) {
4295 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4297 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4301 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4303 * Locks a volume on disk for salvaging.
4305 * @param[in] volumeId volume ID to lock
4307 * @return operation status
4309 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4310 * checked out and locked again
4315 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4320 /* should always be WRITE_LOCK, but keep the lock-type logic all
4321 * in one place, in VVolLockType. Params will be ignored, but
4322 * try to provide what we're logically doing. */
4323 locktype = VVolLockType(V_VOLUPD, 1);
4325 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4327 if (code == EBUSY) {
4328 Abort("Someone else appears to be using volume %lu; Aborted\n",
4329 afs_printable_uint32_lu(volumeId));
4331 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4332 afs_printable_int32_ld(code),
4333 afs_printable_uint32_lu(volumeId));
4336 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4337 if (code == SYNC_DENIED) {
4338 /* need to retry checking out volumes */
4341 if (code != SYNC_OK) {
4342 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4343 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4346 /* set inUse = programType in the volume header to ensure that nobody
4347 * tries to use this volume again without salvaging, if we somehow crash
4348 * or otherwise exit before finishing the salvage.
4352 struct VolumeHeader header;
4353 struct VolumeDiskHeader diskHeader;
4354 struct VolumeDiskData volHeader;
4356 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4361 DiskToVolumeHeader(&header, &diskHeader);
4363 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4364 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4365 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4371 volHeader.inUse = programType;
4373 /* If we can't re-write the header, bail out and error. We don't
4374 * assert when reading the header, since it's possible the
4375 * header isn't really there (when there's no data associated
4376 * with the volume; we just delete the vol header file in that
4377 * case). But if it's there enough that we can read it, but
4378 * somehow we cannot write to it to signify we're salvaging it,
4379 * we've got a big problem and we cannot continue. */
4380 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4381 == sizeof(volHeader));
4388 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4391 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4393 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4395 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4396 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4397 if (code != SYNC_OK) {
4398 Log("AskError: failed to force volume %lu into error state; "
4399 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4400 (long)code, SYNC_res2string(code));
4402 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4406 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4411 memset(&res, 0, sizeof(res));
4413 for (i = 0; i < 3; i++) {
4414 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4415 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4417 if (code == SYNC_OK) {
4419 } else if (code == SYNC_DENIED) {
4421 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4423 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4424 Abort("Salvage aborted\n");
4425 } else if (code == SYNC_BAD_COMMAND) {
4426 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4429 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4430 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4432 Log("AskOffline: fileserver is DAFS but we are not.\n");
4435 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4436 Log("AskOffline: fileserver is not DAFS but we are.\n");
4438 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4441 Abort("Salvage aborted\n");
4444 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4445 FSYNC_clientFinis();
4449 if (code != SYNC_OK) {
4450 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4451 Abort("Salvage aborted\n");
4455 /* don't want to pass around state; remember it here */
4456 static int isDAFS = -1;
4461 afs_int32 code = 1, i;
4463 /* we don't care if we race. the answer shouldn't change */
4467 memset(&res, 0, sizeof(res));
4469 for (i = 0; code && i < 3; i++) {
4470 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4472 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4473 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4474 FSYNC_reason2string(res.hdr.reason));
4475 FSYNC_clientFinis();
4481 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4485 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4495 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4497 struct VolumeDiskHeader diskHdr;
4499 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4501 /* volume probably does not exist; no need to bring back online */
4504 AskOnline(salvinfo, volumeId);
4508 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4512 for (i = 0; i < 3; i++) {
4513 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4514 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4516 if (code == SYNC_OK) {
4518 } else if (code == SYNC_DENIED) {
4519 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4520 } else if (code == SYNC_BAD_COMMAND) {
4521 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4523 Log("AskOnline: please make sure file server binaries are same version.\n");
4527 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4528 FSYNC_clientFinis();
4535 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4540 for (i = 0; i < 3; i++) {
4541 memset(&res, 0, sizeof(res));
4542 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4543 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4545 if (code == SYNC_OK) {
4547 } else if (code == SYNC_DENIED) {
4548 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4549 } else if (code == SYNC_BAD_COMMAND) {
4550 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4553 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4554 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4556 Log("AskOnline: fileserver is DAFS but we are not.\n");
4559 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4560 Log("AskOnline: fileserver is not DAFS but we are.\n");
4562 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4566 } else if (code == SYNC_FAILED &&
4567 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4568 res.hdr.reason == FSYNC_WRONG_PART)) {
4569 /* volume is already effectively 'deleted' */
4573 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4574 FSYNC_clientFinis();
4581 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4583 /* Volume parameter is passed in case iopen is upgraded in future to
4584 * require a volume Id to be passed
4587 IHandle_t *srcH, *destH;
4588 FdHandle_t *srcFdP, *destFdP;
4590 afs_foff_t size = 0;
4592 IH_INIT(srcH, device, rwvolume, inode1);
4593 srcFdP = IH_OPEN(srcH);
4594 opr_Assert(srcFdP != NULL);
4595 IH_INIT(destH, device, rwvolume, inode2);
4596 destFdP = IH_OPEN(destH);
4597 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4598 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4601 opr_Assert(nBytes == 0);
4602 FDH_REALLYCLOSE(srcFdP);
4603 FDH_REALLYCLOSE(destFdP);
4610 PrintInodeList(struct SalvInfo *salvinfo)
4612 struct ViceInodeInfo *ip;
4613 struct ViceInodeInfo *buf;
4616 afs_sfsize_t st_size;
4618 st_size = OS_SIZE(salvinfo->inodeFd);
4619 opr_Assert(st_size >= 0);
4620 buf = malloc(st_size);
4621 opr_Assert(buf != NULL);
4622 nInodes = st_size / sizeof(struct ViceInodeInfo);
4623 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4624 for (ip = buf; nInodes--; ip++) {
4625 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4626 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4627 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4628 ip->u.param[2], ip->u.param[3]);
4634 PrintInodeSummary(struct SalvInfo *salvinfo)
4637 struct InodeSummary *isp;
4639 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4640 isp = &salvinfo->inodeSummary[i];
4641 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4651 opr_Assert(0); /* Fork is never executed in the NT code path */
4655 #ifdef AFS_DEMAND_ATTACH_FS
4656 if ((f == 0) && (programType == salvageServer)) {
4657 /* we are a salvageserver child */
4658 #ifdef FSSYNC_BUILD_CLIENT
4659 VChildProcReconnectFS_r();
4661 #ifdef SALVSYNC_BUILD_CLIENT
4665 #endif /* AFS_DEMAND_ATTACH_FS */
4666 #endif /* !AFS_NT40_ENV */
4676 #ifdef AFS_DEMAND_ATTACH_FS
4677 if (programType == salvageServer) {
4678 /* release all volume locks before closing down our SYNC channels.
4679 * the fileserver may try to online volumes we have checked out when
4680 * we close down FSSYNC, so we should make sure we don't have those
4681 * volumes locked when it does */
4682 struct DiskPartition64 *dp;
4684 for (i = 0; i <= VOLMAXPARTS; i++) {
4685 dp = VGetPartitionById(i, 0);
4687 VLockFileReinit(&dp->volLockFile);
4690 # ifdef SALVSYNC_BUILD_CLIENT
4693 # ifdef FSSYNC_BUILD_CLIENT
4697 #endif /* AFS_DEMAND_ATTACH_FS */
4700 if (main_thread != pthread_self())
4701 pthread_exit((void *)code);
4714 pid = wait(&status);
4715 opr_Assert(pid != -1);
4716 if (WCOREDUMP(status))
4717 Log("\"%s\" core dumped!\n", prog);
4718 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4724 TimeStamp(time_t clock, int precision)
4727 static char timestamp[20];
4728 lt = localtime(&clock);
4730 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4732 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4737 CheckLogFile(char * log_path)
4739 char oldSlvgLog[AFSDIR_PATH_MAX];
4741 #ifndef AFS_NT40_ENV
4748 strcpy(oldSlvgLog, log_path);
4749 strcat(oldSlvgLog, ".old");
4751 rk_rename(log_path, oldSlvgLog);
4752 logFile = afs_fopen(log_path, "a");
4754 if (!logFile) { /* still nothing, use stdout */
4758 #ifndef AFS_NAMEI_ENV
4759 AFS_DEBUG_IOPS_LOG(logFile);
4764 #ifndef AFS_NT40_ENV
4766 TimeStampLogFile(char * log_path)
4768 char stampSlvgLog[AFSDIR_PATH_MAX];
4773 lt = localtime(&now);
4774 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4775 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4776 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4777 lt->tm_min, lt->tm_sec);
4779 /* try to link the logfile to a timestamped filename */
4780 /* if it fails, oh well, nothing we can do */
4781 link(log_path, stampSlvgLog);
4790 #ifndef AFS_NT40_ENV
4792 printf("Can't show log since using syslog.\n");
4803 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4806 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4809 while (fgets(line, sizeof(line), logFile))
4816 Log(const char *format, ...)
4822 va_start(args, format);
4823 vsnprintf(tmp, sizeof tmp, format, args);
4825 #ifndef AFS_NT40_ENV
4827 syslog(LOG_INFO, "%s", tmp);
4831 gettimeofday(&now, NULL);
4832 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4838 Abort(const char *format, ...)
4843 va_start(args, format);
4844 vsnprintf(tmp, sizeof tmp, format, args);
4846 #ifndef AFS_NT40_ENV
4848 syslog(LOG_INFO, "%s", tmp);
4852 fprintf(logFile, "%s", tmp);
4864 ToString(const char *s)
4868 opr_Assert(p != NULL);
4872 /* Remove the FORCESALVAGE file */
4874 RemoveTheForce(char *path)
4877 struct afs_stat_st force; /* so we can use afs_stat to find it */
4878 strcpy(target,path);
4879 strcat(target,"/FORCESALVAGE");
4880 if (!Testing && ForceSalvage) {
4881 if (afs_stat(target,&force) == 0) unlink(target);
4885 #ifndef AFS_AIX32_ENV
4887 * UseTheForceLuke - see if we can use the force
4890 UseTheForceLuke(char *path)
4892 struct afs_stat_st force;
4894 strcpy(target,path);
4895 strcat(target,"/FORCESALVAGE");
4897 return (afs_stat(target, &force) == 0);
4901 * UseTheForceLuke - see if we can use the force
4904 * The VRMIX fsck will not muck with the filesystem it is supposedly
4905 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4906 * muck directly with the root inode, which is within the normal
4908 * ListViceInodes() has a side effect of setting ForceSalvage if
4909 * it detects a need, based on root inode examination.
4912 UseTheForceLuke(char *path)
4915 return 0; /* sorry OB1 */
4920 /* NT support routines */
4922 static char execpathname[MAX_PATH];
4924 nt_SalvagePartition(char *partName, int jobn)
4929 if (!*execpathname) {
4930 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4931 if (!n || n == 1023)
4934 job.cj_magic = SALVAGER_MAGIC;
4935 job.cj_number = jobn;
4936 (void)strcpy(job.cj_part, partName);
4937 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4942 nt_SetupPartitionSalvage(void *datap, int len)
4944 childJob_t *jobp = (childJob_t *) datap;
4945 char logname[AFSDIR_PATH_MAX];
4947 if (len != sizeof(childJob_t))
4949 if (jobp->cj_magic != SALVAGER_MAGIC)
4954 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4956 logFile = afs_fopen(logname, "w");
4964 #endif /* AFS_NT40_ENV */