2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #ifdef AFS_PTHREAD_ENV
104 # include <opr/lock.h>
107 #include <afs/afsint.h>
108 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
109 #if defined(AFS_VFSINCL_ENV)
110 #include <sys/vnode.h>
112 #include <sys/fs/ufs_inode.h>
114 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
115 #include <ufs/ufs/dinode.h>
116 #include <ufs/ffs/fs.h>
118 #include <ufs/inode.h>
121 #else /* AFS_VFSINCL_ENV */
123 #include <ufs/inode.h>
124 #else /* AFS_OSF_ENV */
125 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
126 #include <sys/inode.h>
129 #endif /* AFS_VFSINCL_ENV */
130 #endif /* AFS_SGI_ENV */
133 #include <sys/lockf.h>
136 #include <checklist.h>
138 #if defined(AFS_SGI_ENV)
141 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
143 #include <sys/mnttab.h>
144 #include <sys/mntent.h>
149 #endif /* AFS_SGI_ENV */
150 #endif /* AFS_HPUX_ENV */
154 #include <afs/osi_inode.h>
158 #include <afs/afsutil.h>
159 #include <afs/fileutil.h>
160 #include <rx/rx_queue.h>
165 #include <afs/afssyscalls.h>
169 #include "partition.h"
170 #include "daemon_com.h"
171 #include "daemon_com_inline.h"
173 #include "fssync_inline.h"
174 #include "volume_inline.h"
175 #include "salvsync.h"
176 #include "viceinode.h"
178 #include "volinodes.h" /* header magic number, etc. stuff */
179 #include "vol-salvage.h"
181 #include "vol_internal.h"
183 #include <afs/prs_fs.h>
185 #ifdef FSSYNC_BUILD_CLIENT
186 #include "vg_cache.h"
193 #define SALV_BUFFER_SIZE 1024
196 extern void *calloc();
198 static char *TimeStamp(char *buffer, size_t size, time_t clock, int precision);
201 int debug; /* -d flag */
202 extern int Testing; /* -n flag */
203 int ListInodeOption; /* -i flag */
204 int ShowRootFiles; /* -r flag */
205 int RebuildDirs; /* -sal flag */
206 int Parallel = 4; /* -para X flag */
207 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
208 int forceR = 0; /* -b flag */
209 int ShowLog = 0; /* -showlog flag */
210 char *ShowLogFilename = NULL; /* log file name for -showlog */
211 int ShowSuid = 0; /* -showsuid flag */
212 int ShowMounts = 0; /* -showmounts flag */
213 int orphans = ORPH_IGNORE; /* -orphans option */
215 int ClientMode = 0; /* running as salvager server client */
223 #define MAXPARALLEL 32
225 int OKToZap; /* -o flag */
226 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
227 * in the volume header */
229 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
232 * information that is 'global' to a particular salvage job.
235 Device fileSysDevice; /**< The device number of the current partition
237 char fileSysPath[9]; /**< The path of the mounted partition currently
238 * being salvaged, i.e. the directory containing
239 * the volume headers */
240 char *fileSysPathName; /**< NT needs this to make name pretty log. */
241 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
242 int VGLinkH_cnt; /**< # of references to lnk handle. */
243 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
246 char *fileSysDeviceName; /**< The block device where the file system being
247 * salvaged was mounted */
248 char *filesysfulldev;
250 int VolumeChanged; /**< Set by any routine which would change the
251 * volume in a way which would require callbacks
252 * to be broken if the volume was put back on
253 * on line by an active file server */
255 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
256 * header dealt with */
258 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
259 FD_t inodeFd; /**< File descriptor for inode file */
261 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
262 int nVolumes; /**< Number of volumes (read-write and read-only)
263 * in volume summary */
264 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
267 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
268 * vnodes in the volume that
269 * we are currently looking
271 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
272 * to contact the fileserver over FSYNC */
279 /* Forward declarations */
280 static void QuietExit(int) AFS_NORETURN;
281 static void SalvageShowLog(void);
282 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
283 static int AskVolumeSummary(struct SalvInfo *salvinfo,
284 VolumeId singleVolumeNumber);
285 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
286 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
288 #ifdef AFS_DEMAND_ATTACH_FS
289 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
290 #endif /* AFS_DEMAND_ATTACH_FS */
292 /* Uniquifier stored in the Inode */
297 return (u & 0x3fffff);
299 #if defined(AFS_SGI_EXMAG)
300 return (u & SGI_UNIQMASK);
303 #endif /* AFS_SGI_EXMAG */
310 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
312 return 0; /* otherwise may be transient, e.g. EMFILE */
317 char *save_args[MAX_ARGS];
319 extern pthread_t main_thread;
320 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
324 * Get the salvage lock if not already held. Hold until process exits.
326 * @param[in] locktype READ_LOCK or WRITE_LOCK
329 _ObtainSalvageLock(int locktype)
331 struct VLockFile salvageLock;
336 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
338 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
341 "salvager: There appears to be another salvager running! "
346 "salvager: Error %d trying to acquire salvage lock! "
352 ObtainSalvageLock(void)
354 _ObtainSalvageLock(WRITE_LOCK);
357 ObtainSharedSalvageLock(void)
359 _ObtainSalvageLock(READ_LOCK);
363 #ifdef AFS_SGI_XFS_IOPS_ENV
364 /* Check if the given partition is mounted. For XFS, the root inode is not a
365 * constant. So we check the hard way.
368 IsPartitionMounted(char *part)
371 struct mntent *mntent;
373 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
374 while (mntent = getmntent(mntfp)) {
375 if (!strcmp(part, mntent->mnt_dir))
380 return mntent ? 1 : 1;
383 /* Check if the given inode is the root of the filesystem. */
384 #ifndef AFS_SGI_XFS_IOPS_ENV
386 IsRootInode(struct afs_stat_st *status)
389 * The root inode is not a fixed value in XFS partitions. So we need to
390 * see if the partition is in the list of mounted partitions. This only
391 * affects the SalvageFileSys path, so we check there.
393 return (status->st_ino == ROOTINODE);
398 #ifndef AFS_NAMEI_ENV
399 /* We don't want to salvage big files filesystems, since we can't put volumes on
403 CheckIfBigFilesFS(char *mountPoint, char *devName)
405 struct superblock fs;
408 if (strncmp(devName, "/dev/", 5)) {
409 (void)sprintf(name, "/dev/%s", devName);
411 (void)strcpy(name, devName);
414 if (ReadSuper(&fs, name) < 0) {
415 Log("Unable to read superblock. Not salvaging partition %s.\n",
419 if (IsBigFilesFileSystem(&fs)) {
420 Log("Partition %s is a big files filesystem, not salvaging.\n",
430 #define HDSTR "\\Device\\Harddisk"
431 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
433 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
439 static int dowarn = 1;
441 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
443 if (strncmp(res1, HDSTR, HDLEN)) {
446 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
447 res1, HDSTR, p1->devName);
450 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
452 if (strncmp(res2, HDSTR, HDLEN)) {
455 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
456 res2, HDSTR, p2->devName);
460 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
463 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
466 /* This assumes that two partitions with the same device number divided by
467 * PartsPerDisk are on the same disk.
470 SalvageFileSysParallel(struct DiskPartition64 *partP)
473 struct DiskPartition64 *partP;
474 int pid; /* Pid for this job */
475 int jobnumb; /* Log file job number */
476 struct job *nextjob; /* Next partition on disk to salvage */
478 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
479 struct job *thisjob = 0;
480 static int numjobs = 0;
481 static int jobcount = 0;
489 /* We have a partition to salvage. Copy it into thisjob */
490 thisjob = calloc(1, sizeof(struct job));
492 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
495 thisjob->partP = partP;
496 thisjob->jobnumb = jobcount;
498 } else if (jobcount == 0) {
499 /* We are asking to wait for all jobs (partp == 0), yet we never
502 Log("No file system partitions named %s* found; not salvaged\n",
503 VICE_PARTITION_PREFIX);
507 if (debug || Parallel == 1) {
509 SalvageFileSys(thisjob->partP, 0);
516 /* Check to see if thisjob is for a disk that we are already
517 * salvaging. If it is, link it in as the next job to do. The
518 * jobs array has 1 entry per disk being salvages. numjobs is
519 * the total number of disks currently being salvaged. In
520 * order to keep thejobs array compact, when a disk is
521 * completed, the hightest element in the jobs array is moved
522 * down to now open slot.
524 for (j = 0; j < numjobs; j++) {
525 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
526 /* On same disk, add it to this list and return */
527 thisjob->nextjob = jobs[j]->nextjob;
528 jobs[j]->nextjob = thisjob;
535 /* Loop until we start thisjob or until all existing jobs are finished */
536 while (thisjob || (!partP && (numjobs > 0))) {
537 startjob = -1; /* No new job to start */
539 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
540 /* Either the max jobs are running or we have to wait for all
541 * the jobs to finish. In either case, we wait for at least one
542 * job to finish. When it's done, clean up after it.
544 pid = wait(&wstatus);
545 opr_Assert(pid != -1);
546 for (j = 0; j < numjobs; j++) { /* Find which job it is */
547 if (pid == jobs[j]->pid)
550 opr_Assert(j < numjobs);
551 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
552 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
555 numjobs--; /* job no longer running */
556 oldjob = jobs[j]; /* remember */
557 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
558 free(oldjob); /* free the old job */
560 /* If there is another partition on the disk to salvage, then
561 * say we will start it (startjob). If not, then put thisjob there
562 * and say we will start it.
564 if (jobs[j]) { /* Another partitions to salvage */
565 startjob = j; /* Will start it */
566 } else { /* There is not another partition to salvage */
568 jobs[j] = thisjob; /* Add thisjob */
570 startjob = j; /* Will start it */
572 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
573 startjob = -1; /* Don't start it - already running */
577 /* We don't have to wait for a job to complete */
579 jobs[numjobs] = thisjob; /* Add this job */
581 startjob = numjobs; /* Will start it */
585 /* Start up a new salvage job on a partition in job slot "startjob" */
586 if (startjob != -1) {
588 Log("Starting salvage of file system partition %s\n",
589 jobs[startjob]->partP->name);
591 /* For NT, we not only fork, but re-exec the salvager. Pass in the
592 * commands and pass the child job number via the data path.
595 nt_SalvagePartition(jobs[startjob]->partP->name,
596 jobs[startjob]->jobnumb);
597 jobs[startjob]->pid = pid;
602 jobs[startjob]->pid = pid;
608 for (fd = 0; fd < 16; fd++)
614 ShowLog = 0; /* Child processes do not display. */
615 if (asprintf(&logFileName, "%s.%d",
616 AFSDIR_SERVER_SLVGLOG_FILEPATH,
617 jobs[startjob]->jobnumb) >= 0) {
618 OpenLog(logFileName);
622 SalvageFileSys1(jobs[startjob]->partP, 0);
627 } /* while ( thisjob || (!partP && numjobs > 0) ) */
629 /* If waited for all jobs to complete, now collect log files and return */
631 if (!serverLogSyslog) /* if syslogging - no need to collect */
634 char *buf = calloc(1, SALV_BUFFER_SIZE);
638 Log("out of memory");
640 for (i = 0; i < jobcount; i++) {
641 if (asprintf(&logFileName, "%s.%d",
642 AFSDIR_SERVER_SLVGLOG_FILEPATH, i) < 0) {
643 Log("out of memory");
646 if ((passLog = afs_fopen(logFileName, "r"))) {
647 while (fgets(buf, SALV_BUFFER_SIZE, passLog)) {
648 WriteLogBuffer(buf, strlen(buf));
652 (void)unlink(logFileName);
664 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
666 if (!canfork || debug || Fork() == 0) {
667 SalvageFileSys1(partP, singleVolumeNumber);
668 if (canfork && !debug) {
672 Wait("SalvageFileSys");
676 get_DevName(char *pbuffer, char *wpath)
678 char pbuf[128], *ptr;
679 strcpy(pbuf, pbuffer);
680 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
686 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
688 strcpy(pbuffer, ptr + 1);
695 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
698 char inodeListPath[256];
699 FD_t inodeFile = INVALID_FD;
700 static char tmpDevName[100];
701 static char wpath[100];
702 struct VolumeSummary *vsp, *esp;
706 struct SalvInfo l_salvinfo;
707 struct SalvInfo *salvinfo = &l_salvinfo;
710 memset(salvinfo, 0, sizeof(*salvinfo));
713 if (inodeFile != INVALID_FD) {
715 inodeFile = INVALID_FD;
717 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
718 Abort("Raced too many times with fileserver restarts while trying to "
719 "checkout/lock volumes; Aborted\n");
721 #ifdef AFS_DEMAND_ATTACH_FS
723 /* unlock all previous volume locks, since we're about to lock them
725 VLockFileReinit(&partP->volLockFile);
727 #endif /* AFS_DEMAND_ATTACH_FS */
729 salvinfo->fileSysPartition = partP;
730 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
731 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
734 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
735 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
736 name = partP->devName;
738 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
739 strcpy(tmpDevName, partP->devName);
740 name = get_DevName(tmpDevName, wpath);
741 salvinfo->fileSysDeviceName = name;
742 salvinfo->filesysfulldev = wpath;
745 if (singleVolumeNumber) {
746 #ifndef AFS_DEMAND_ATTACH_FS
747 /* only non-DAFS locks the partition when salvaging a single volume;
748 * DAFS will lock the individual volumes in the VG */
749 VLockPartition(partP->name);
750 #endif /* !AFS_DEMAND_ATTACH_FS */
754 /* salvageserver already setup fssync conn for us */
755 if ((programType != salvageServer) && !VConnectFS()) {
756 Abort("Couldn't connect to file server\n");
759 salvinfo->useFSYNC = 1;
760 AskOffline(salvinfo, singleVolumeNumber);
761 #ifdef AFS_DEMAND_ATTACH_FS
762 if (LockVolume(salvinfo, singleVolumeNumber)) {
765 #endif /* AFS_DEMAND_ATTACH_FS */
768 salvinfo->useFSYNC = 0;
769 VLockPartition(partP->name);
773 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
776 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
777 partP->name, name, (Testing ? "(READONLY mode)" : ""));
779 Log("***Forced salvage of all volumes on this partition***\n");
784 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
791 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
792 while ((dp = readdir(dirp))) {
793 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
794 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
796 Log("Removing old salvager temp files %s\n", dp->d_name);
797 strcpy(npath, salvinfo->fileSysPath);
798 strcat(npath, OS_DIRSEP);
799 strcat(npath, dp->d_name);
805 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
807 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
808 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
810 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
814 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
815 if (inodeFile == INVALID_FD) {
816 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
819 /* Using nt_unlink here since we're really using the delete on close
820 * semantics of unlink. In most places in the salvager, we really do
821 * mean to unlink the file at that point. Those places have been
822 * modified to actually do that so that the NT crt can be used there.
824 * jaltman - On NT delete on close cannot be applied to a file while the
825 * process has an open file handle that does not have DELETE file
826 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
827 * delete privileges. As a result the nt_unlink() call will always
830 code = nt_unlink(inodeListPath);
832 code = unlink(inodeListPath);
835 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
838 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
842 salvinfo->inodeFd = inodeFile;
843 if (salvinfo->inodeFd == INVALID_FD)
844 Abort("Temporary file %s is missing...\n", inodeListPath);
845 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
846 if (ListInodeOption) {
847 PrintInodeList(salvinfo);
848 if (singleVolumeNumber) {
849 /* We've checked out the volume from the fileserver, and we need
850 * to give it back. We don't know if the volume exists or not,
851 * so we don't know whether to AskOnline or not. Try to determine
852 * if the volume exists by trying to read the volume header, and
853 * AskOnline if it is readable. */
854 MaybeAskOnline(salvinfo, singleVolumeNumber);
858 /* enumerate volumes in the partition.
859 * figure out sets of read-only + rw volumes.
860 * salvage each set, read-only volumes first, then read-write.
861 * Fix up inodes on last volume in set (whether it is read-write
864 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
868 if (singleVolumeNumber) {
869 /* If we delete a volume during the salvage, we indicate as such by
870 * setting the volsummary->deleted field. We need to know if we
871 * deleted a volume or not in order to know which volumes to bring
872 * back online after the salvage. If we fork, we will lose this
873 * information, since volsummary->deleted will not get set in the
874 * parent. So, don't fork. */
878 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
879 i < salvinfo->nVolumesInInodeFile; i = j) {
880 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
882 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
884 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
885 struct VolumeSummary *tsp;
886 /* Scan volume list (from partition root directory) looking for the
887 * current rw volume number in the volume list from the inode scan.
888 * If there is one here that is not in the inode volume list,
890 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
892 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
894 /* Now match up the volume summary info from the root directory with the
895 * entry in the volume list obtained from scanning inodes */
896 salvinfo->inodeSummary[j].volSummary = NULL;
897 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
898 if (tsp->header.id == vid) {
899 salvinfo->inodeSummary[j].volSummary = tsp;
905 /* Salvage the group of volumes (several read-only + 1 read/write)
906 * starting with the current read-only volume we're looking at.
909 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
911 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
912 #endif /* AFS_NT40_ENV */
916 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
917 for (; vsp < esp; vsp++) {
919 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
922 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
923 RemoveTheForce(salvinfo->fileSysPath);
925 if (!Testing && singleVolumeNumber) {
927 #ifdef AFS_DEMAND_ATTACH_FS
928 /* unlock vol headers so the fs can attach them when we AskOnline */
929 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
930 #endif /* AFS_DEMAND_ATTACH_FS */
932 /* Step through the volumeSummary list and set all volumes on-line.
933 * Most volumes were taken off-line in GetVolumeSummary.
934 * If a volume was deleted, don't tell the fileserver anything, since
935 * we already told the fileserver the volume was deleted back when we
936 * we destroyed the volume header.
937 * Also, make sure we bring the singleVolumeNumber back online first.
940 for (j = 0; j < salvinfo->nVolumes; j++) {
941 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
943 if (!salvinfo->volumeSummaryp[j].deleted) {
944 AskOnline(salvinfo, singleVolumeNumber);
950 /* If singleVolumeNumber is not in our volumeSummary, it means that
951 * at least one other volume in the VG is on the partition, but the
952 * RW volume is not. We've already AskOffline'd it by now, though,
953 * so make sure we don't still have the volume checked out. */
954 AskDelete(salvinfo, singleVolumeNumber);
957 for (j = 0; j < salvinfo->nVolumes; j++) {
958 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
959 if (!salvinfo->volumeSummaryp[j].deleted) {
960 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
966 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
967 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
970 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
974 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
977 char filename[VMAXPATHLEN];
983 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
984 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
987 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
990 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
992 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
993 afs_printable_int32_ld(code),
994 afs_printable_VolumeId_lu(vsp->header.id));
997 /* make sure we actually delete the header file; ENOENT
998 * is fine, since VDestroyVolumeDiskHeader probably already
1000 if (unlink(path) && errno != ENOENT) {
1001 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1003 if (salvinfo->useFSYNC) {
1004 AskDelete(salvinfo, vsp->header.id);
1011 CompareInodes(const void *_p1, const void *_p2)
1013 const struct ViceInodeInfo *p1 = _p1;
1014 const struct ViceInodeInfo *p2 = _p2;
1015 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1016 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1017 VolumeId p1rwid, p2rwid;
1019 (p1->u.vnode.vnodeNumber ==
1020 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1022 (p2->u.vnode.vnodeNumber ==
1023 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1024 if (p1rwid < p2rwid)
1026 if (p1rwid > p2rwid)
1028 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1029 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1030 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1031 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1032 if (p1->u.vnode.volumeId == p1rwid)
1034 if (p2->u.vnode.volumeId == p2rwid)
1036 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1038 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1039 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1040 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1042 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1044 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1046 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1048 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1050 /* The following tests are reversed, so that the most desirable
1051 * of several similar inodes comes first */
1052 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1053 #ifdef AFS_3DISPARES
1054 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1055 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1058 #ifdef AFS_SGI_EXMAG
1059 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1060 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1065 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1066 #ifdef AFS_3DISPARES
1067 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1068 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1071 #ifdef AFS_SGI_EXMAG
1072 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1073 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1078 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1079 #ifdef AFS_3DISPARES
1080 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1081 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1084 #ifdef AFS_SGI_EXMAG
1085 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1086 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1091 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1092 #ifdef AFS_3DISPARES
1093 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1094 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1097 #ifdef AFS_SGI_EXMAG
1098 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1099 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1108 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1109 struct InodeSummary *summary)
1111 VolumeId volume = ip->u.vnode.volumeId;
1112 VolumeId rwvolume = volume;
1117 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1119 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1121 rwvolume = ip->u.special.parentId;
1122 /* This isn't quite right, as there could (in error) be different
1123 * parent inodes in different special vnodes */
1125 if (maxunique < ip->u.vnode.vnodeUniquifier)
1126 maxunique = ip->u.vnode.vnodeUniquifier;
1130 summary->volumeId = volume;
1131 summary->RWvolumeId = rwvolume;
1132 summary->nInodes = n;
1133 summary->nSpecialInodes = nSpecial;
1134 summary->maxUniquifier = maxunique;
1138 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, VolumeId singleVolumeNumber, void *rock)
1140 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1141 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1142 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1147 * Collect list of inodes in file named by path. If a truly fatal error,
1148 * unlink the file and abort. For lessor errors, return -1. The file will
1149 * be unlinked by the caller.
1152 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1156 struct ViceInodeInfo *ip, *ip_save;
1157 struct InodeSummary summary;
1158 char summaryFileName[50];
1159 FD_t summaryFile = INVALID_FD;
1161 char *dev = salvinfo->fileSysPath;
1162 char *wpath = salvinfo->fileSysPath;
1164 char *dev = salvinfo->fileSysDeviceName;
1165 char *wpath = salvinfo->filesysfulldev;
1167 char *part = salvinfo->fileSysPath;
1172 afs_sfsize_t st_size;
1174 /* This file used to come from vfsck; cobble it up ourselves now... */
1176 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1177 singleVolumeNumber ? OnlyOneVolume : 0,
1178 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1180 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1184 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1186 if (forceSal && !ForceSalvage) {
1187 Log("***Forced salvage of all volumes on this partition***\n");
1190 OS_SEEK(inodeFile, 0L, SEEK_SET);
1191 salvinfo->inodeFd = inodeFile;
1192 if (salvinfo->inodeFd == INVALID_FD ||
1193 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1194 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1196 tdir = (tmpdir ? tmpdir : part);
1198 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1199 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1201 snprintf(summaryFileName, sizeof summaryFileName,
1202 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1204 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1205 if (summaryFile == INVALID_FD) {
1206 Abort("Unable to create inode summary file\n");
1210 /* Using nt_unlink here since we're really using the delete on close
1211 * semantics of unlink. In most places in the salvager, we really do
1212 * mean to unlink the file at that point. Those places have been
1213 * modified to actually do that so that the NT crt can be used there.
1215 * jaltman - As commented elsewhere, this cannot work because fopen()
1216 * does not open files with DELETE and FILE_SHARE_DELETE.
1218 code = nt_unlink(summaryFileName);
1220 code = unlink(summaryFileName);
1223 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1226 if (!canfork || debug || Fork() == 0) {
1227 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1229 OS_CLOSE(summaryFile);
1230 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1231 RemoveTheForce(salvinfo->fileSysPath);
1233 struct VolumeSummary *vsp;
1237 GetVolumeSummary(salvinfo, singleVolumeNumber);
1239 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1241 if (vsp->header.id == singleVolumeNumber) {
1244 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1250 MaybeAskOnline(salvinfo, singleVolumeNumber);
1252 /* make sure we get rid of stray .vol headers, even if
1253 * they're not in our volume summary (might happen if
1254 * e.g. something else created them and they're not in the
1255 * fileserver VGC) */
1256 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1257 singleVolumeNumber, 0 /*parent*/);
1258 AskDelete(salvinfo, singleVolumeNumber);
1262 Log("%s vice inodes on %s; not salvaged\n",
1263 singleVolumeNumber ? "No applicable" : "No", dev);
1268 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1270 OS_CLOSE(summaryFile);
1272 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1275 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1276 OS_CLOSE(summaryFile);
1277 Abort("Unable to read inode table; %s not salvaged\n", dev);
1279 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1280 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1281 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1282 OS_CLOSE(summaryFile);
1283 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1288 CountVolumeInodes(ip, nInodes, &summary);
1289 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1290 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1291 OS_CLOSE(summaryFile);
1295 summary.index += (summary.nInodes);
1296 nInodes -= summary.nInodes;
1297 ip += summary.nInodes;
1300 ip = ip_save = NULL;
1301 /* Following fflush is not fclose, because if it was debug mode would not work */
1302 if (OS_SYNC(summaryFile) == -1) {
1303 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1304 OS_CLOSE(summaryFile);
1308 if (canfork && !debug) {
1312 if (Wait("Inode summary") == -1) {
1313 OS_CLOSE(summaryFile);
1314 Exit(1); /* salvage of this partition aborted */
1318 st_size = OS_SIZE(summaryFile);
1319 opr_Assert(st_size >= 0);
1322 salvinfo->inodeSummary = malloc(st_size);
1323 opr_Assert(salvinfo->inodeSummary != NULL);
1324 /* For GNU we need to do lseek to get the file pointer moved. */
1325 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1326 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1327 opr_Assert(ret == st_size);
1329 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1330 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1331 salvinfo->inodeSummary[i].volSummary = NULL;
1333 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1334 OS_CLOSE(summaryFile);
1337 if (retcode && singleVolumeNumber && !deleted) {
1338 AskError(salvinfo, singleVolumeNumber);
1344 /* Comparison routine for volume sort.
1345 This is setup so that a read-write volume comes immediately before
1346 any read-only clones of that volume */
1348 CompareVolumes(const void *_p1, const void *_p2)
1350 const struct VolumeSummary *p1 = _p1;
1351 const struct VolumeSummary *p2 = _p2;
1352 if (p1->header.parent != p2->header.parent)
1353 return p1->header.parent < p2->header.parent ? -1 : 1;
1354 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1356 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1358 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1362 * Gleans volumeSummary information by asking the fileserver
1364 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1365 * salvaging a whole partition
1367 * @return whether we obtained the volume summary information or not
1368 * @retval 0 success; we obtained the volume summary information
1369 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1371 * @retval 1 we did not get the volume summary information; either the
1372 * fileserver responded with an error, or we are not supposed to
1373 * ask the fileserver for the information (e.g. we are salvaging
1374 * the entire partition or we are not the salvageserver)
1376 * @note for non-DAFS, always returns 1
1379 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1382 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1383 if (programType == salvageServer) {
1384 if (singleVolumeNumber) {
1385 FSSYNC_VGQry_response_t q_res;
1387 struct VolumeSummary *vsp;
1389 struct VolumeDiskHeader diskHdr;
1391 memset(&res, 0, sizeof(res));
1393 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1396 * We must wait for the partition to finish scanning before
1397 * can continue, since we will not know if we got the entire
1398 * VG membership unless the partition is fully scanned.
1399 * We could, in theory, just scan the partition ourselves if
1400 * the VG cache is not ready, but we would be doing the exact
1401 * same scan the fileserver is doing; it will almost always
1402 * be faster to wait for the fileserver. The only exceptions
1403 * are if the partition does not take very long to scan, and
1404 * in that case it's fast either way, so who cares?
1406 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1407 Log("waiting for fileserver to finish scanning partition %s...\n",
1408 salvinfo->fileSysPartition->name);
1410 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1411 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1412 * just so small partitions don't need to wait over 10
1413 * seconds every time, and large partitions are generally
1414 * polled only once every ten seconds. */
1415 sleep((i > 10) ? (i = 10) : i);
1417 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1421 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1422 /* This can happen if there's no header for the volume
1423 * we're salvaging, or no headers exist for the VG (if
1424 * we're salvaging an RW). Act as if we got a response
1425 * with no VG members. The headers may be created during
1426 * salvaging, if there are inodes in this VG. */
1428 memset(&q_res, 0, sizeof(q_res));
1429 q_res.rw = singleVolumeNumber;
1433 Log("fileserver refused VGCQuery request for volume %" AFS_VOLID_FMT " on "
1434 "partition %s, code %ld reason %ld\n",
1435 afs_printable_VolumeId_lu(singleVolumeNumber),
1436 salvinfo->fileSysPartition->name,
1437 afs_printable_int32_ld(code),
1438 afs_printable_int32_ld(res.hdr.reason));
1442 if (q_res.rw != singleVolumeNumber) {
1443 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1444 afs_printable_VolumeId_lu(singleVolumeNumber),
1445 afs_printable_VolumeId_lu(q_res.rw));
1446 #ifdef SALVSYNC_BUILD_CLIENT
1447 if (SALVSYNC_LinkVolume(q_res.rw,
1449 salvinfo->fileSysPartition->name,
1451 Log("schedule request failed\n");
1453 #endif /* SALVSYNC_BUILD_CLIENT */
1454 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1457 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1458 opr_Assert(salvinfo->volumeSummaryp != NULL);
1460 salvinfo->nVolumes = 0;
1461 vsp = salvinfo->volumeSummaryp;
1463 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1464 char name[VMAXPATHLEN];
1466 if (!q_res.children[i]) {
1470 /* AskOffline for singleVolumeNumber was called much earlier */
1471 if (q_res.children[i] != singleVolumeNumber) {
1472 AskOffline(salvinfo, q_res.children[i]);
1473 if (LockVolume(salvinfo, q_res.children[i])) {
1479 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1481 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1482 afs_printable_uint32_lu(q_res.children[i]));
1487 DiskToVolumeHeader(&vsp->header, &diskHdr);
1488 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1490 salvinfo->nVolumes++;
1494 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1499 Log("Cannot get volume summary from fileserver; falling back to scanning "
1500 "entire partition\n");
1503 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1508 * count how many volume headers are found by VWalkVolumeHeaders.
1510 * @param[in] dp the disk partition (unused)
1511 * @param[in] name full path to the .vol header (unused)
1512 * @param[in] hdr the header data (unused)
1513 * @param[in] last whether this is the last try or not (unused)
1514 * @param[in] rock actually an afs_int32*; the running count of how many
1515 * volumes we have found
1520 CountHeader(struct DiskPartition64 *dp, const char *name,
1521 struct VolumeDiskHeader *hdr, int last, void *rock)
1523 afs_int32 *nvols = (afs_int32 *)rock;
1529 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1532 struct SalvageScanParams {
1533 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1534 * vol id of the VG we're salvaging */
1535 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1536 * we're filling in */
1537 afs_int32 nVolumes; /**< # of vols we've encountered */
1538 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1539 * # of vols we've alloc'd memory for) */
1540 int retry; /**< do we need to retry vol lock/checkout? */
1541 struct SalvInfo *salvinfo; /**< salvage job info */
1545 * records volume summary info found from VWalkVolumeHeaders.
1547 * Found volumes are also taken offline if they are in the specific volume
1548 * group we are looking for.
1550 * @param[in] dp the disk partition
1551 * @param[in] name full path to the .vol header
1552 * @param[in] hdr the header data
1553 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1554 * @param[in] rock actually a struct SalvageScanParams*, containing the
1555 * information needed to record the volume summary data
1557 * @return operation status
1559 * @retval -1 volume locking raced with fileserver restart; checking out
1560 * and locking volumes needs to be retried
1561 * @retval 1 volume header is mis-named and should be deleted
1564 RecordHeader(struct DiskPartition64 *dp, const char *name,
1565 struct VolumeDiskHeader *hdr, int last, void *rock)
1567 char nameShouldBe[64];
1568 struct SalvageScanParams *params;
1569 struct VolumeSummary summary;
1570 VolumeId singleVolumeNumber;
1571 struct SalvInfo *salvinfo;
1573 params = (struct SalvageScanParams *)rock;
1575 memset(&summary, 0, sizeof(summary));
1577 singleVolumeNumber = params->singleVolumeNumber;
1578 salvinfo = params->salvinfo;
1580 DiskToVolumeHeader(&summary.header, hdr);
1582 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1583 && summary.header.parent != singleVolumeNumber) {
1585 if (programType == salvageServer) {
1586 #ifdef SALVSYNC_BUILD_CLIENT
1587 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1588 afs_printable_VolumeId_lu(summary.header.id),
1589 afs_printable_VolumeId_lu(summary.header.parent));
1590 if (SALVSYNC_LinkVolume(summary.header.parent,
1594 Log("schedule request failed\n");
1597 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1600 Log("%" AFS_VOLID_FMT " is a read-only volume; not salvaged\n",
1601 afs_printable_VolumeId_lu(singleVolumeNumber));
1606 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1607 || summary.header.parent == singleVolumeNumber) {
1609 /* check if the header file is incorrectly named */
1611 const char *base = strrchr(name, OS_DIRSEPC);
1618 snprintf(nameShouldBe, sizeof nameShouldBe,
1619 VFORMAT, afs_printable_VolumeId_lu(summary.header.id));
1622 if (strcmp(nameShouldBe, base)) {
1623 /* .vol file has wrong name; retry/delete */
1627 if (!badname || last) {
1628 /* only offline the volume if the header is good, or if this is
1629 * the last try looking at it; avoid AskOffline'ing the same vol
1632 if (singleVolumeNumber
1633 && summary.header.id != singleVolumeNumber) {
1634 /* don't offline singleVolumeNumber; we already did that
1637 AskOffline(salvinfo, summary.header.id);
1639 #ifdef AFS_DEMAND_ATTACH_FS
1641 /* don't lock the volume if the header is bad, since we're
1642 * about to delete it anyway. */
1643 if (LockVolume(salvinfo, summary.header.id)) {
1648 #endif /* AFS_DEMAND_ATTACH_FS */
1652 if (last && !Showmode) {
1653 Log("Volume header file %s is incorrectly named (should be %s "
1654 "not %s); %sdeleted (it will be recreated later, if "
1655 "necessary)\n", name, nameShouldBe, base,
1656 (Testing ? "it would have been " : ""));
1664 if (params->nVolumes > params->totalVolumes) {
1665 /* We found more volumes than we found on the first partition walk;
1666 * apparently something created a volume while we were
1667 * partition-salvaging, or we found more than 20 vols when salvaging a
1668 * particular volume. Abort if we detect this, since other programs
1669 * supposed to not touch the partition while it is partition-salvaging,
1670 * and we shouldn't find more than 20 vols in a VG.
1672 Abort("Found %ld vol headers, but should have found at most %ld! "
1673 "Make sure the volserver/fileserver are not running at the "
1674 "same time as a partition salvage\n",
1675 afs_printable_int32_ld(params->nVolumes),
1676 afs_printable_int32_ld(params->totalVolumes));
1679 memcpy(params->vsp, &summary, sizeof(summary));
1687 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1689 * If the header could not be read in at all, the header is always unlinked.
1690 * If instead RecordHeader said the header was bad (that is, the header file
1691 * is mis-named), we only unlink if we are doing a partition salvage, as
1692 * opposed to salvaging a specific volume group.
1694 * @param[in] dp the disk partition
1695 * @param[in] name full path to the .vol header
1696 * @param[in] hdr header data, or NULL if the header could not be read
1697 * @param[in] rock actually a struct SalvageScanParams*, with some information
1701 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1702 struct VolumeDiskHeader *hdr, void *rock)
1704 struct SalvageScanParams *params;
1707 params = (struct SalvageScanParams *)rock;
1710 /* no header; header is too bogus to read in at all */
1712 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1718 } else if (!params->singleVolumeNumber) {
1719 /* We were able to read in a header, but RecordHeader said something
1720 * was wrong with it. We only unlink those if we are doing a partition
1727 if (dounlink && unlink(name)) {
1728 Log("Error %d while trying to unlink %s\n", errno, name);
1733 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1734 * the fileserver for VG information, or by scanning the /vicepX partition.
1736 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1737 * are salvaging, or 0 if this is a partition
1740 * @return operation status
1742 * @retval -1 we raced with a fileserver restart; checking out and locking
1743 * volumes must be retried
1746 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1748 afs_int32 nvols = 0;
1749 struct SalvageScanParams params;
1752 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1754 /* we successfully got the vol information from the fileserver; no
1755 * need to scan the partition */
1759 /* we need to retry volume checkout */
1763 if (!singleVolumeNumber) {
1764 /* Count how many volumes we have in /vicepX */
1765 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1768 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1773 nvols = VOL_VG_MAX_VOLS;
1776 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1777 opr_Assert(salvinfo->volumeSummaryp != NULL);
1779 params.singleVolumeNumber = singleVolumeNumber;
1780 params.vsp = salvinfo->volumeSummaryp;
1781 params.nVolumes = 0;
1782 params.totalVolumes = nvols;
1784 params.salvinfo = salvinfo;
1786 /* walk the partition directory of volume headers and record the info
1787 * about them; unlinking invalid headers */
1788 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1789 UnlinkHeader, ¶ms);
1791 /* we apparently need to retry checking-out/locking volumes */
1795 Abort("Failed to get volume header summary\n");
1797 salvinfo->nVolumes = params.nVolumes;
1799 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1805 #ifdef AFS_NAMEI_ENV
1806 /* Find the link table. This should be associated with the RW volume, even
1807 * if there is only an RO volume at this site.
1810 FindLinkHandle(struct InodeSummary *isp, int nVols,
1811 struct ViceInodeInfo *allInodes)
1814 struct ViceInodeInfo *ip;
1816 for (i = 0; i < nVols; i++) {
1817 ip = allInodes + isp[i].index;
1818 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1819 if (ip[j].u.special.volumeId == isp->RWvolumeId &&
1820 ip[j].u.special.parentId == isp->RWvolumeId &&
1821 ip[j].u.special.type == VI_LINKTABLE) {
1822 return ip[j].inodeNumber;
1830 CheckDupLinktable(struct SalvInfo *salvinfo, struct InodeSummary *isp, struct ViceInodeInfo *ip)
1833 if (ip->u.vnode.vnodeNumber != INODESPECIAL) {
1834 /* not a linktable; process as a normal file */
1837 if (ip->u.special.type != VI_LINKTABLE) {
1838 /* not a linktable; process as a normal file */
1842 /* make sure nothing inc/decs it */
1845 if (ip->u.special.volumeId == ip->u.special.parentId) {
1846 /* This is a little weird, but shouldn't break anything, and there is
1847 * no known way that this can happen; just do nothing, in case deleting
1848 * it would screw something up. */
1849 Log("Inode %s appears to be a valid linktable for id (%u), but it's not\n",
1850 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId);
1851 Log("the linktable for our volume group (%u). This is unusual, since\n",
1853 Log("there should only be one linktable per volume group. I'm leaving\n");
1854 Log("it alone, just to be safe.\n");
1858 Log("Linktable %s appears to be invalid (parentid/volumeid mismatch: %u != %u)\n",
1859 PrintInode(stmp, ip->inodeNumber), ip->u.special.parentId, ip->u.special.volumeId);
1861 Log("Would have deleted linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1866 Log("Deleting linktable inode %s\n", PrintInode(stmp, ip->inodeNumber));
1867 IH_INIT(tmpH, salvinfo->fileSysDevice, isp->RWvolumeId, ip->inodeNumber);
1868 namei_HandleToName(&ufs_name, tmpH);
1869 if (unlink(ufs_name.n_path) < 0) {
1870 Log("Error %d unlinking path %s\n", errno, ufs_name.n_path);
1879 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1881 struct versionStamp version;
1884 if (!VALID_INO(ino))
1886 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1887 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1888 if (!VALID_INO(ino))
1890 ("Unable to allocate link table inode for volume %" AFS_VOLID_FMT " (error = %d)\n",
1891 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1892 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1893 fdP = IH_OPEN(salvinfo->VGLinkH);
1895 Abort("Can't open link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1896 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1898 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1899 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1900 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1902 version.magic = LINKTABLEMAGIC;
1903 version.version = LINKTABLEVERSION;
1905 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1907 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1908 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1910 FDH_REALLYCLOSE(fdP);
1912 /* If the volume summary exits (i.e., the V*.vol header file exists),
1913 * then set this inode there as well.
1915 if (isp->volSummary)
1916 isp->volSummary->header.linkTable = ino;
1925 SVGParms_t *parms = (SVGParms_t *) arg;
1926 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1931 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1934 pthread_attr_t tattr;
1938 /* Initialize per volume global variables, even if later code does so */
1939 salvinfo->VolumeChanged = 0;
1940 salvinfo->VGLinkH = NULL;
1941 salvinfo->VGLinkH_cnt = 0;
1942 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1944 parms.svgp_inodeSummaryp = isp;
1945 parms.svgp_count = nVols;
1946 parms.svgp_salvinfo = salvinfo;
1947 code = pthread_attr_init(&tattr);
1949 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1953 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1955 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1958 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1960 Log("Failed to create thread to salvage volume group %u\n",
1964 (void)pthread_join(tid, NULL);
1966 #endif /* AFS_NT40_ENV */
1969 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1971 struct ViceInodeInfo *inodes, *allInodes, *ip;
1972 int i, totalInodes, size, salvageTo;
1976 int dec_VGLinkH = 0;
1978 FdHandle_t *fdP = NULL;
1980 salvinfo->VGLinkH_cnt = 0;
1981 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1982 && isp->nSpecialInodes > 0);
1983 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1984 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1987 if (ShowMounts && !haveRWvolume)
1989 if (canfork && !debug && Fork() != 0) {
1990 (void)Wait("Salvage volume group");
1993 for (i = 0, totalInodes = 0; i < nVols; i++)
1994 totalInodes += isp[i].nInodes;
1995 size = totalInodes * sizeof(struct ViceInodeInfo);
1996 inodes = malloc(size);
1997 allInodes = inodes - isp->index; /* this would the base of all the inodes
1998 * for the partition, if all the inodes
1999 * had been read into memory */
2001 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
2003 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
2005 /* Don't try to salvage a read write volume if there isn't one on this
2007 salvageTo = haveRWvolume ? 0 : 1;
2009 #ifdef AFS_NAMEI_ENV
2010 ino = FindLinkHandle(isp, nVols, allInodes);
2011 if (VALID_INO(ino)) {
2012 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
2013 fdP = IH_OPEN(salvinfo->VGLinkH);
2015 if (VALID_INO(ino) && fdP != NULL) {
2016 struct versionStamp header;
2017 afs_sfsize_t nBytes;
2019 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
2020 if (nBytes != sizeof(struct versionStamp)
2021 || header.magic != LINKTABLEMAGIC) {
2022 Log("Bad linktable header for volume %" AFS_VOLID_FMT ".\n", afs_printable_VolumeId_lu(isp->RWvolumeId));
2023 FDH_REALLYCLOSE(fdP);
2027 if (!VALID_INO(ino) || fdP == NULL) {
2028 Log("%s link table for volume %" AFS_VOLID_FMT ".\n",
2029 Testing ? "Would have recreated" : "Recreating", afs_printable_VolumeId_lu(isp->RWvolumeId));
2031 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2034 struct ViceInodeInfo *ip;
2035 CreateLinkTable(salvinfo, isp, ino);
2036 fdP = IH_OPEN(salvinfo->VGLinkH);
2037 /* Sync fake 1 link counts to the link table, now that it exists */
2039 for (i = 0; i < nVols; i++) {
2040 ip = allInodes + isp[i].index;
2041 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
2042 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 0);
2043 ip[j].linkCount = 1;
2050 FDH_REALLYCLOSE(fdP);
2052 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2055 /* Salvage in reverse order--read/write volume last; this way any
2056 * Inodes not referenced by the time we salvage the read/write volume
2057 * can be picked up by the read/write volume */
2058 /* ACTUALLY, that's not done right now--the inodes just vanish */
2059 for (i = nVols - 1; i >= salvageTo; i--) {
2061 struct InodeSummary *lisp = &isp[i];
2062 #ifdef AFS_NAMEI_ENV
2063 if (rw && (nVols > 1 || isp[i].nSpecialInodes == isp[i].nInodes)) {
2064 /* If nVols > 1, we have more than one vol in this volgroup, so
2065 * the RW inodes we detected may just be for the linktable, and
2066 * there is no actual RW volume.
2068 * Additionally, if we only have linktable inodes (no other
2069 * special inodes, no data inodes), there is also no actual RW
2070 * volume to salvage; this is just cruft left behind by something
2071 * else. In that case nVols will only be 1, though, so also
2072 * perform this linktables-only check if we don't have any
2073 * non-special inodes. */
2075 int all_linktables = 1;
2076 for (inode_i = 0; inode_i < isp[i].nSpecialInodes; inode_i++) {
2077 if (inodes[inode_i].u.special.type != VI_LINKTABLE) {
2082 if (all_linktables) {
2083 /* All we have are linktable special inodes, so skip salvaging
2084 * the RW; there was never an RW volume here. If we don't do
2085 * this, we risk creating a new "phantom" RW that the VLDB
2086 * doesn't know about, which is confusing and can cause
2094 Log("%s VOLUME %" AFS_VOLID_FMT "%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2095 afs_printable_VolumeId_lu(lisp->volumeId), (Testing ? "(READONLY mode)" : ""));
2096 /* Check inodes twice. The second time do things seriously. This
2097 * way the whole RO volume can be deleted, below, if anything goes wrong */
2098 for (check = 1; check >= 0; check--) {
2100 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2102 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2103 if (rw && deleteMe) {
2104 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2105 * volume won't be called */
2111 if (rw && check == 1)
2113 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2114 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2120 /* Fix actual inode counts */
2123 Log("totalInodes %d\n",totalInodes);
2124 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2125 static int TraceBadLinkCounts = 0;
2126 #ifdef AFS_NAMEI_ENV
2127 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2128 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2129 VGLinkH_p1 = ip->u.param[0];
2130 continue; /* Deal with this last. */
2131 } else if (CheckDupLinktable(salvinfo, isp, ip)) {
2132 /* Don't touch this inode; CheckDupLinktable has handled it */
2136 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2137 TraceBadLinkCounts--; /* Limit reports, per volume */
2138 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]); /* VolumeId in param */
2141 /* If ip->linkCount is non-zero at this point, then the linkcount
2142 * for the inode on disk is wrong. Initially linkCount is set to
2143 * the actual link count of the inode on disk, and then we (the
2144 * salvager) decrement it for every reference to that inode that we
2145 * find. So if linkCount is still positive by this point, it means
2146 * that the linkcount on disk is too high, so we should DEC the
2147 * inode. If linkCount is negative, it means the linkcount is too
2148 * low, so we should INC the inode.
2150 * If we get an error while INC'ing or DEC'ing, that's a little
2151 * odd and indicates a bug, but try to continue anyway, so the
2152 * volume may still be made accessible. */
2153 while (ip->linkCount > 0) {
2155 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2156 Log("idec failed. inode %s errno %d\n",
2157 PrintInode(stmp, ip->inodeNumber), errno);
2163 while (ip->linkCount < 0) {
2165 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2166 Log("iinc failed. inode %s errno %d\n",
2167 PrintInode(stmp, ip->inodeNumber), errno);
2174 #ifdef AFS_NAMEI_ENV
2175 while (dec_VGLinkH > 0) {
2176 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2177 Log("idec failed on link table, errno = %d\n", errno);
2181 while (dec_VGLinkH < 0) {
2182 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2183 Log("iinc failed on link table, errno = %d\n", errno);
2190 /* Directory consistency checks on the rw volume */
2192 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2193 IH_RELEASE(salvinfo->VGLinkH);
2195 if (canfork && !debug) {
2201 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2203 /* Check headers BEFORE forking */
2207 for (i = 0; i < nVols; i++) {
2208 struct VolumeSummary *vs = isp[i].volSummary;
2209 VolumeDiskData volHeader;
2211 /* Don't salvage just because phantom rw volume is there... */
2212 /* (If a read-only volume exists, read/write inodes must also exist) */
2213 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2217 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2218 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2219 == sizeof(volHeader)
2220 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2221 && volHeader.dontSalvage == DONT_SALVAGE
2222 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2223 if (volHeader.inUse != 0) {
2224 volHeader.inUse = 0;
2225 volHeader.inService = 1;
2227 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2228 != sizeof(volHeader)) {
2244 /* SalvageVolumeHeaderFile
2246 * Salvage the top level V*.vol header file. Make sure the special files
2247 * exist and that there are no duplicates.
2249 * Calls SalvageHeader for each possible type of volume special file.
2253 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2254 struct ViceInodeInfo *inodes, int RW,
2255 int check, int *deleteMe)
2258 struct ViceInodeInfo *ip;
2259 int allinodesobsolete = 1;
2260 struct VolumeDiskHeader diskHeader;
2261 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2263 struct VolumeHeader tempHeader;
2264 struct afs_inode_info stuff[MAXINODETYPE];
2266 /* keeps track of special inodes that are probably 'good'; they are
2267 * referenced in the vol header, and are included in the given inodes
2272 } goodspecial[MAXINODETYPE];
2277 memset(goodspecial, 0, sizeof(goodspecial));
2279 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2281 Log("cannot allocate memory for inode skip array when salvaging "
2282 "volume %lu; not performing duplicate special inode recovery\n",
2283 afs_printable_uint32_lu(isp->volumeId));
2284 /* still try to perform the salvage; the skip array only does anything
2285 * if we detect duplicate special inodes */
2288 init_inode_info(&tempHeader, stuff);
2291 * First, look at the special inodes and see if any are referenced by
2292 * the existing volume header. If we find duplicate special inodes, we
2293 * can use this information to use the referenced inode (it's more
2294 * likely to be the 'good' one), and throw away the duplicates.
2296 if (isp->volSummary && skip) {
2297 /* use tempHeader, so we can use the stuff[] array to easily index
2298 * into the isp->volSummary special inodes */
2299 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2301 for (i = 0; i < isp->nSpecialInodes; i++) {
2302 ip = &inodes[isp->index + i];
2303 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2304 /* will get taken care of in a later loop */
2307 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2308 goodspecial[ip->u.special.type-1].valid = 1;
2309 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2314 memset(&tempHeader, 0, sizeof(tempHeader));
2315 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2316 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2317 tempHeader.id = isp->volumeId;
2318 tempHeader.parent = isp->RWvolumeId;
2320 /* Check for duplicates (inodes are sorted by type field) */
2321 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2322 ip = &inodes[isp->index + i];
2323 if (ip->u.special.type == (ip + 1)->u.special.type) {
2324 afs_ino_str_t stmp1, stmp2;
2326 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2327 /* Will be caught in the loop below */
2331 Log("Duplicate special %d inodes for volume %" AFS_VOLID_FMT " found (%s, %s);\n",
2332 ip->u.special.type, afs_printable_VolumeId_lu(isp->volumeId),
2333 PrintInode(stmp1, ip->inodeNumber),
2334 PrintInode(stmp2, (ip+1)->inodeNumber));
2336 if (skip && goodspecial[ip->u.special.type-1].valid) {
2337 Inode gi = goodspecial[ip->u.special.type-1].inode;
2340 Log("using special inode referenced by vol header (%s)\n",
2341 PrintInode(stmp1, gi));
2344 /* the volume header references some special inode of
2345 * this type in the inodes array; are we it? */
2346 if (ip->inodeNumber != gi) {
2348 } else if ((ip+1)->inodeNumber != gi) {
2349 /* in case this is the last iteration; we need to
2350 * make sure we check ip+1, too */
2355 Log("cannot determine which is correct; salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
2363 for (i = 0; i < isp->nSpecialInodes; i++) {
2365 ip = &inodes[isp->index + i];
2366 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2368 Log("Rubbish header inode %s of type %d\n",
2369 PrintInode(stmp, ip->inodeNumber),
2370 ip->u.special.type);
2376 Log("Rubbish header inode %s of type %d; deleted\n",
2377 PrintInode(stmp, ip->inodeNumber),
2378 ip->u.special.type);
2379 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2380 if (skip && skip[i]) {
2381 if (orphans == ORPH_REMOVE) {
2382 Log("Removing orphan special inode %s of type %d\n",
2383 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2386 Log("Ignoring orphan special inode %s of type %d\n",
2387 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2388 /* fall through to the ip->linkCount--; line below */
2391 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2392 allinodesobsolete = 0;
2394 if (!check && ip->u.special.type != VI_LINKTABLE)
2395 ip->linkCount--; /* Keep the inode around */
2403 if (allinodesobsolete) {
2410 salvinfo->VGLinkH_cnt++; /* one for every header. */
2412 if (!RW && !check && isp->volSummary) {
2413 ClearROInUseBit(isp->volSummary);
2417 for (i = 0; i < MAXINODETYPE; i++) {
2418 if (stuff[i].inodeType == VI_LINKTABLE) {
2419 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2420 * And we may have recreated the link table earlier, so set the
2421 * RW header as well. The header magic was already checked.
2423 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2424 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2428 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2432 if (isp->volSummary == NULL) {
2434 char headerName[64];
2435 snprintf(headerName, sizeof headerName, VFORMAT,
2436 afs_printable_VolumeId_lu(isp->volumeId));
2437 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2438 salvinfo->fileSysPath, headerName);
2440 Log("No header file for volume %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(isp->volumeId));
2444 Log("No header file for volume %" AFS_VOLID_FMT "; %screating %s\n",
2445 afs_printable_VolumeId_lu(isp->volumeId), (Testing ? "it would have been " : ""),
2447 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2449 writefunc = VCreateVolumeDiskHeader;
2452 char headerName[64];
2453 /* hack: these two fields are obsolete... */
2454 isp->volSummary->header.volumeAcl = 0;
2455 isp->volSummary->header.volumeMountTable = 0;
2458 (&isp->volSummary->header, &tempHeader,
2459 sizeof(struct VolumeHeader))) {
2460 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2461 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2462 salvinfo->fileSysPath, headerName);
2464 Log("Header file %s is damaged or no longer valid%s\n", path,
2465 (check ? "" : "; repairing"));
2469 writefunc = VWriteVolumeDiskHeader;
2473 memcpy(&isp->volSummary->header, &tempHeader,
2474 sizeof(struct VolumeHeader));
2477 Log("It would have written a new header file for volume %" AFS_VOLID_FMT "\n",
2478 afs_printable_VolumeId_lu(isp->volumeId));
2481 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2482 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2484 Log("Error %ld writing volume header file for volume %" AFS_VOLID_FMT "\n",
2485 afs_printable_int32_ld(code),
2486 afs_printable_VolumeId_lu(diskHeader.id));
2491 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2492 isp->volSummary->header.volumeInfo);
2497 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2498 struct InodeSummary *isp, int check, int *deleteMe)
2501 VolumeDiskData volumeInfo;
2502 struct versionStamp fileHeader;
2511 #ifndef AFS_NAMEI_ENV
2512 if (sp->inodeType == VI_LINKTABLE)
2513 return 0; /* header magic was already checked */
2515 if (*(sp->inode) == 0) {
2517 Log("Missing inode in volume header (%s)\n", sp->description);
2521 Log("Missing inode in volume header (%s); %s\n", sp->description,
2522 (Testing ? "it would have recreated it" : "recreating"));
2525 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2526 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2527 if (!VALID_INO(*(sp->inode)))
2529 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2530 sp->description, errno);
2535 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2536 fdP = IH_OPEN(specH);
2537 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2538 /* bail out early and destroy the volume */
2540 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2547 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2548 sp->description, errno);
2551 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2552 || header.fileHeader.magic != sp->stamp.magic)) {
2554 Log("Part of the header (%s) is corrupted\n", sp->description);
2555 FDH_REALLYCLOSE(fdP);
2559 Log("Part of the header (%s) is corrupted; recreating\n",
2562 /* header can be garbage; make sure we don't read garbage data from
2564 memset(&header, 0, sizeof(header));
2566 #ifdef AFS_NAMEI_ENV
2567 if (namei_FixSpecialOGM(fdP, check)) {
2568 Log("Error with namei header OGM data (%s)\n", sp->description);
2569 FDH_REALLYCLOSE(fdP);
2574 if (sp->inodeType == VI_VOLINFO
2575 && header.volumeInfo.destroyMe == DESTROY_ME) {
2578 FDH_REALLYCLOSE(fdP);
2582 if (recreate && !Testing) {
2585 ("Internal error: recreating volume header (%s) in check mode\n",
2587 nBytes = FDH_TRUNC(fdP, 0);
2589 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2590 sp->description, errno);
2592 /* The following code should be moved into vutil.c */
2593 if (sp->inodeType == VI_VOLINFO) {
2595 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2596 header.volumeInfo.stamp = sp->stamp;
2597 header.volumeInfo.id = isp->volumeId;
2598 header.volumeInfo.parentId = isp->RWvolumeId;
2599 sprintf(header.volumeInfo.name, "bogus.%" AFS_VOLID_FMT, afs_printable_VolumeId_lu(isp->volumeId));
2600 Log("Warning: the name of volume %" AFS_VOLID_FMT " is now \"bogus.%" AFS_VOLID_FMT "\"\n",
2601 afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->volumeId));
2602 header.volumeInfo.inService = 0;
2603 header.volumeInfo.blessed = 0;
2604 /* The + 1000 is a hack in case there are any files out in venus caches */
2605 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2606 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2607 header.volumeInfo.needsCallback = 0;
2608 gettimeofday(&tp, NULL);
2609 header.volumeInfo.creationDate = tp.tv_sec;
2611 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2612 sizeof(header.volumeInfo), 0);
2613 if (nBytes != sizeof(header.volumeInfo)) {
2616 ("Unable to write volume header file (%s) (errno = %d)\n",
2617 sp->description, errno);
2618 Abort("Unable to write entire volume header file (%s)\n",
2622 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2623 if (nBytes != sizeof(sp->stamp)) {
2626 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2627 sp->description, errno);
2629 ("Unable to write entire version stamp in volume header file (%s)\n",
2634 FDH_REALLYCLOSE(fdP);
2636 if (sp->inodeType == VI_VOLINFO) {
2637 salvinfo->VolInfo = header.volumeInfo;
2642 if (salvinfo->VolInfo.updateDate) {
2643 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.updateDate, 0));
2645 Log("%s (%" AFS_VOLID_FMT ") %supdated %s\n", salvinfo->VolInfo.name,
2646 afs_printable_VolumeId_lu(salvinfo->VolInfo.id),
2647 (Testing ? "it would have been " : ""), update);
2649 strcpy(update, TimeStamp(buffer, sizeof(buffer), salvinfo->VolInfo.creationDate, 0));
2651 Log("%s (%" AFS_VOLID_FMT ") not updated (created %s)\n",
2652 salvinfo->VolInfo.name, afs_printable_VolumeId_lu(salvinfo->VolInfo.id), update);
2662 SalvageVnodes(struct SalvInfo *salvinfo,
2663 struct InodeSummary *rwIsp,
2664 struct InodeSummary *thisIsp,
2665 struct ViceInodeInfo *inodes, int check)
2667 int ilarge, ismall, ioffset, RW, nInodes;
2668 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2671 RW = (rwIsp == thisIsp);
2672 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2674 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2675 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2676 if (check && ismall == -1)
2679 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2680 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2681 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2685 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2686 struct ViceInodeInfo *ip, int nInodes,
2687 struct VolumeSummary *volSummary, int check)
2689 char buf[SIZEOF_LARGEDISKVNODE];
2690 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2692 StreamHandle_t *file;
2693 struct VnodeClassInfo *vcp;
2695 afs_sfsize_t nVnodes;
2696 afs_fsize_t vnodeLength;
2698 afs_ino_str_t stmp1, stmp2;
2702 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2703 fdP = IH_OPEN(handle);
2704 opr_Assert(fdP != NULL);
2705 file = FDH_FDOPEN(fdP, "r+");
2706 opr_Assert(file != NULL);
2707 vcp = &VnodeClassInfo[class];
2708 size = OS_SIZE(fdP->fd_fd);
2709 opr_Assert(size != -1);
2710 nVnodes = (size / vcp->diskSize) - 1;
2712 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2713 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2717 for (vnodeIndex = 0;
2718 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2719 nVnodes--, vnodeIndex++) {
2720 if (vnode->type != vNull) {
2721 int vnodeChanged = 0;
2722 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2723 if (VNDISK_GET_INO(vnode) == 0) {
2725 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2726 memset(vnode, 0, vcp->diskSize);
2730 if (vcp->magic != vnode->vnodeMagic) {
2731 /* bad magic #, probably partially created vnode */
2733 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2734 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2735 afs_printable_uint32_lu(vcp->magic));
2736 memset(vnode, 0, vcp->diskSize);
2740 Log("Partially allocated vnode %d deleted.\n",
2742 memset(vnode, 0, vcp->diskSize);
2746 /* ****** Should do a bit more salvage here: e.g. make sure
2747 * vnode type matches what it should be given the index */
2748 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2749 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2750 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2751 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2758 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2759 /* The following doesn't work, because the version number
2760 * is not maintained correctly by the file server */
2761 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2762 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2764 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2770 /* For RW volume, look for vnode with matching inode number;
2771 * if no such match, take the first determined by our sort
2773 struct ViceInodeInfo *lip = ip;
2774 int lnInodes = nInodes;
2776 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2777 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2786 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2787 /* "Matching" inode */
2791 vu = vnode->uniquifier;
2792 iu = ip->u.vnode.vnodeUniquifier;
2793 vd = vnode->dataVersion;
2794 id = ip->u.vnode.inodeDataVersion;
2796 * Because of the possibility of the uniquifier overflows (> 4M)
2797 * we compare them modulo the low 22-bits; we shouldn't worry
2798 * about mismatching since they shouldn't to many old
2799 * uniquifiers of the same vnode...
2801 if (IUnique(vu) != IUnique(iu)) {
2803 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2806 vnode->uniquifier = iu;
2807 #ifdef AFS_3DISPARES
2808 vnode->dataVersion = (id >= vd ?
2811 1887437 ? vd : id) :
2814 1887437 ? id : vd));
2816 #if defined(AFS_SGI_EXMAG)
2817 vnode->dataVersion = (id >= vd ?
2820 15099494 ? vd : id) :
2823 15099494 ? id : vd));
2825 vnode->dataVersion = (id > vd ? id : vd);
2826 #endif /* AFS_SGI_EXMAG */
2827 #endif /* AFS_3DISPARES */
2830 /* don't bother checking for vd > id any more, since
2831 * partial file transfers always result in this state,
2832 * and you can't do much else anyway (you've already
2833 * found the best data you can) */
2834 #ifdef AFS_3DISPARES
2835 if (!vnodeIsDirectory(vnodeNumber)
2836 && ((vd < id && (id - vd) < 1887437)
2837 || ((vd > id && (vd - id) > 1887437)))) {
2839 #if defined(AFS_SGI_EXMAG)
2840 if (!vnodeIsDirectory(vnodeNumber)
2841 && ((vd < id && (id - vd) < 15099494)
2842 || ((vd > id && (vd - id) > 15099494)))) {
2844 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2845 #endif /* AFS_SGI_EXMAG */
2848 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2849 vnode->dataVersion = id;
2854 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2857 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2859 VNDISK_SET_INO(vnode, ip->inodeNumber);
2864 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2866 VNDISK_SET_INO(vnode, ip->inodeNumber);
2869 VNDISK_GET_LEN(vnodeLength, vnode);
2870 if (ip->byteCount != vnodeLength) {
2873 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2878 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2879 VNDISK_SET_LEN(vnode, ip->byteCount);
2883 ip->linkCount--; /* Keep the inode around */
2886 } else { /* no matching inode */
2888 if (VNDISK_GET_INO(vnode) != 0
2889 || vnode->type == vDirectory) {
2890 /* No matching inode--get rid of the vnode */
2892 if (VNDISK_GET_INO(vnode)) {
2894 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2898 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2903 if (VNDISK_GET_INO(vnode)) {
2905 time_t serverModifyTime = vnode->serverModifyTime;
2906 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2910 time_t serverModifyTime = vnode->serverModifyTime;
2911 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2914 memset(vnode, 0, vcp->diskSize);
2917 /* Should not reach here becuase we checked for
2918 * (inodeNumber == 0) above. And where we zero the vnode,
2919 * we also goto vnodeDone.
2923 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2927 } /* VNDISK_GET_INO(vnode) != 0 */
2929 opr_Assert(!(vnodeChanged && check));
2930 if (vnodeChanged && !Testing) {
2931 opr_Verify(IH_IWRITE(handle,
2932 vnodeIndexOffset(vcp, vnodeNumber),
2933 (char *)vnode, vcp->diskSize)
2935 salvinfo->VolumeChanged = 1; /* For break call back */
2946 struct VnodeEssence *
2947 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2950 struct VnodeInfo *vip;
2953 class = vnodeIdToClass(vnodeNumber);
2954 vip = &salvinfo->vnodeInfo[class];
2955 offset = vnodeIdToBitNumber(vnodeNumber);
2956 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2960 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2962 /* Copy the directory unconditionally if we are going to change it:
2963 * not just if was cloned.
2965 struct VnodeDiskObject vnode;
2966 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2967 Inode oldinode, newinode;
2970 if (dir->copied || Testing)
2972 DFlush(); /* Well justified paranoia... */
2975 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2976 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2978 opr_Assert(code == sizeof(vnode));
2979 oldinode = VNDISK_GET_INO(&vnode);
2980 /* Increment the version number by a whole lot to avoid problems with
2981 * clients that were promised new version numbers--but the file server
2982 * crashed before the versions were written to disk.
2985 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2986 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2988 opr_Assert(VALID_INO(newinode));
2989 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
2992 VNDISK_SET_INO(&vnode, newinode);
2994 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2995 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2997 opr_Assert(code == sizeof(vnode));
2999 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
3000 salvinfo->fileSysDevice, newinode,
3001 &salvinfo->VolumeChanged);
3002 /* Don't delete the original inode right away, because the directory is
3003 * still being scanned.
3009 * This function should either successfully create a new dir, or give up
3010 * and leave things the way they were. In particular, if it fails to write
3011 * the new dir properly, it should return w/o changing the reference to the
3015 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
3017 struct VnodeDiskObject vnode;
3018 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
3019 Inode oldinode, newinode;
3024 afs_int32 parentUnique = 1;
3025 struct VnodeEssence *vnodeEssence;
3030 Log("Salvaging directory %u...\n", dir->vnodeNumber);
3032 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
3033 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3035 opr_Assert(lcode == sizeof(vnode));
3036 oldinode = VNDISK_GET_INO(&vnode);
3037 /* Increment the version number by a whole lot to avoid problems with
3038 * clients that were promised new version numbers--but the file server
3039 * crashed before the versions were written to disk.
3042 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
3043 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
3045 opr_Assert(VALID_INO(newinode));
3046 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
3047 &salvinfo->VolumeChanged);
3049 /* Assign . and .. vnode numbers from dir and vnode.parent.
3050 * The uniquifier for . is in the vnode.
3051 * The uniquifier for .. might be set to a bogus value of 1 and
3052 * the salvager will later clean it up.
3054 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
3055 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
3058 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
3060 (vnode.parent ? vnode.parent : dir->vnodeNumber),
3065 /* didn't really build the new directory properly, let's just give up. */
3066 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3067 Log("Directory salvage returned code %d, continuing.\n", code);
3069 Log("also failed to decrement link count on new inode");
3073 Log("Checking the results of the directory salvage...\n");
3074 if (!DirOK(&newdir)) {
3075 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3076 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3077 opr_Assert(code == 0);
3081 VNDISK_SET_INO(&vnode, newinode);
3082 length = afs_dir_Length(&newdir);
3083 VNDISK_SET_LEN(&vnode, length);
3085 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3086 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3088 opr_Assert(lcode == sizeof(vnode));
3089 IH_CONDSYNC(salvinfo->vnodeInfo[vLarge].handle);
3091 /* make sure old directory file is really closed */
3092 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3093 FDH_REALLYCLOSE(fdP);
3095 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3096 opr_Assert(code == 0);
3097 dir->dirHandle = newdir;
3101 * arguments for JudgeEntry.
3103 struct judgeEntry_params {
3104 struct DirSummary *dir; /**< directory we're examining entries in */
3105 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3109 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3112 struct judgeEntry_params *params = arock;
3113 struct DirSummary *dir = params->dir;
3114 struct SalvInfo *salvinfo = params->salvinfo;
3115 struct VnodeEssence *vnodeEssence;
3116 afs_int32 dirOrphaned, todelete;
3118 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3120 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3121 if (vnodeEssence == NULL) {
3123 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3126 CopyOnWrite(salvinfo, dir);
3127 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3132 #ifndef AFS_NAMEI_ENV
3133 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3134 * mount inode for the partition. If this inode were deleted, it would crash
3137 if (vnodeEssence->InodeNumber == 0) {
3138 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3140 CopyOnWrite(salvinfo, dir);
3141 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3148 if (!(vnodeNumber & 1) && !Showmode
3149 && !(vnodeEssence->count || vnodeEssence->unique
3150 || vnodeEssence->modeBits)) {
3151 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3152 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3153 vnodeNumber, unique,
3154 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3158 CopyOnWrite(salvinfo, dir);
3159 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3165 /* Check if the Uniquifiers match. If not, change the directory entry
3166 * so its unique matches the vnode unique. Delete if the unique is zero
3167 * or if the directory is orphaned.
3169 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3170 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3173 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3175 /* This is an orphaned directory. Don't delete the . or ..
3176 * entry. Otherwise, it will get created in the next
3177 * salvage and deleted again here. So Just skip it.
3181 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3182 * Entries arriving here should be deleted, but the directory
3183 * is not orphaned. Therefore, the entry must be pointing at
3184 * the wrong vnode. Skip the 'else' clause and fall through;
3185 * the code below will repair the entry so it correctly points
3186 * at the vnode of the current directory (if '.') or the parent
3187 * directory (if '..'). */
3190 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n",
3191 dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique,
3192 vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3196 fid.Vnode = vnodeNumber;
3197 fid.Unique = vnodeEssence->unique;
3198 CopyOnWrite(salvinfo, dir);
3199 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3201 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3204 return 0; /* no need to continue */
3208 if (strcmp(name, ".") == 0) {
3209 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3211 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3214 CopyOnWrite(salvinfo, dir);
3215 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3216 fid.Vnode = dir->vnodeNumber;
3217 fid.Unique = dir->unique;
3218 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3219 vnodeNumber = fid.Vnode; /* Get the new Essence */
3220 unique = fid.Unique;
3221 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3225 } else if (strcmp(name, "..") == 0) {
3228 struct VnodeEssence *dotdot;
3229 pa.Vnode = dir->parent;
3230 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3231 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3232 pa.Unique = dotdot->unique;
3234 pa.Vnode = dir->vnodeNumber;
3235 pa.Unique = dir->unique;
3237 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3239 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3241 CopyOnWrite(salvinfo, dir);
3242 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3243 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3246 vnodeNumber = pa.Vnode; /* Get the new Essence */
3248 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3250 dir->haveDotDot = 1;
3251 } else if (strncmp(name, ".__afs", 6) == 0) {
3253 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3256 CopyOnWrite(salvinfo, dir);
3257 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3259 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3260 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3263 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3264 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3265 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3266 && !(vnodeEssence->modeBits & 0111)) {
3267 afs_sfsize_t nBytes;
3273 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3274 vnodeEssence->InodeNumber);
3277 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3281 size = FDH_SIZE(fdP);
3283 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3284 FDH_REALLYCLOSE(fdP);
3291 nBytes = FDH_PREAD(fdP, buf, size, 0);
3292 if (nBytes == size) {
3294 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3295 Log("Volume %" AFS_VOLID_FMT " (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3296 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid), dir->vname, dir->name ? dir->name : "??", name, buf,
3297 Testing ? "would convert" : "converted");
3298 vnodeEssence->modeBits |= 0111;
3299 vnodeEssence->changed = 1;
3300 } else if (ShowMounts)
3301 Log("In volume %" AFS_VOLID_FMT " (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3302 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid),
3303 dir->vname, dir->name ? dir->name : "??", name, buf);
3305 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3306 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3308 FDH_REALLYCLOSE(fdP);
3311 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3312 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3313 if (vnodeIdToClass(vnodeNumber) == vLarge
3314 && vnodeEssence->name == NULL) {
3315 vnodeEssence->name = strdup(name);
3318 /* The directory entry points to the vnode. Check to see if the
3319 * vnode points back to the directory. If not, then let the
3320 * directory claim it (else it might end up orphaned). Vnodes
3321 * already claimed by another directory are deleted from this
3322 * directory: hardlinks to the same vnode are not allowed
3323 * from different directories.
3325 if (vnodeEssence->parent != dir->vnodeNumber) {
3326 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3327 /* Vnode does not point back to this directory.
3328 * Orphaned dirs cannot claim a file (it may belong to
3329 * another non-orphaned dir).
3332 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3334 vnodeEssence->parent = dir->vnodeNumber;
3335 vnodeEssence->changed = 1;
3337 /* Vnode was claimed by another directory */
3340 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3341 } else if (vnodeNumber == 1) {
3342 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3344 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3348 CopyOnWrite(salvinfo, dir);
3349 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3354 /* This directory claims the vnode */
3355 vnodeEssence->claimed = 1;
3357 vnodeEssence->count--;
3362 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3363 VnodeClass class, Inode ino, Unique * maxu)
3365 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3366 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3367 char buf[SIZEOF_LARGEDISKVNODE];
3368 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3370 StreamHandle_t *file;
3375 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3376 fdP = IH_OPEN(vip->handle);
3377 opr_Assert(fdP != NULL);
3378 file = FDH_FDOPEN(fdP, "r+");
3379 opr_Assert(file != NULL);
3380 size = OS_SIZE(fdP->fd_fd);
3381 opr_Assert(size != -1);
3382 vip->nVnodes = (size / vcp->diskSize) - 1;
3383 if (vip->nVnodes > 0) {
3384 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3385 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3386 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3387 sizeof(struct VnodeEssence)))
3389 if (class == vLarge) {
3390 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3400 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3401 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3402 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3403 nVnodes--, vnodeIndex++) {
3404 if (vnode->type != vNull) {
3405 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3406 afs_fsize_t vnodeLength;
3407 vip->nAllocatedVnodes++;
3408 vep->count = vnode->linkCount;
3409 VNDISK_GET_LEN(vnodeLength, vnode);
3410 vep->blockCount = nBlocks(vnodeLength);
3411 vip->volumeBlockCount += vep->blockCount;
3412 vep->parent = vnode->parent;
3413 vep->unique = vnode->uniquifier;
3414 if (*maxu < vnode->uniquifier)
3415 *maxu = vnode->uniquifier;
3416 vep->modeBits = vnode->modeBits;
3417 vep->InodeNumber = VNDISK_GET_INO(vnode);
3418 vep->type = vnode->type;
3419 vep->author = vnode->author;
3420 vep->owner = vnode->owner;
3421 vep->group = vnode->group;
3422 if (vnode->type == vDirectory) {
3423 if (class != vLarge) {
3424 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3425 vip->nAllocatedVnodes--;
3426 memset(vnode, 0, sizeof(*vnode));
3427 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3428 vnodeIndexOffset(vcp, vnodeNumber),
3429 (char *)&vnode, sizeof(vnode));
3430 salvinfo->VolumeChanged = 1;
3432 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3441 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3444 struct VnodeEssence *parentvp;
3450 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3451 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3452 strcat(path, OS_DIRSEP);
3453 strcat(path, vp->name);
3459 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3460 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3463 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3465 struct VnodeEssence *vep;
3468 return (1); /* Vnode zero does not exist */
3470 return (0); /* The root dir vnode is always claimed */
3471 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3472 if (!vep || !vep->claimed)
3473 return (1); /* Vnode is not claimed - it is orphaned */
3475 return (IsVnodeOrphaned(salvinfo, vep->parent));
3479 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3480 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3481 struct DirSummary *rootdir, int *rootdirfound)
3483 static struct DirSummary dir;
3484 static struct DirHandle dirHandle;
3485 struct VnodeEssence *parent;
3486 static char path[MAXPATHLEN];
3489 if (dirVnodeInfo->vnodes[i].salvaged)
3490 return; /* already salvaged */
3493 dirVnodeInfo->vnodes[i].salvaged = 1;
3495 if (dirVnodeInfo->inodes[i] == 0)
3496 return; /* Not allocated to a directory */
3498 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3499 if (dirVnodeInfo->vnodes[i].parent) {
3500 Log("Bad parent, vnode 1; %s...\n",
3501 (Testing ? "skipping" : "salvaging"));
3502 dirVnodeInfo->vnodes[i].parent = 0;
3503 dirVnodeInfo->vnodes[i].changed = 1;
3506 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3507 if (parent && parent->salvaged == 0)
3508 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3509 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3510 rootdir, rootdirfound);
3513 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3514 dir.unique = dirVnodeInfo->vnodes[i].unique;
3517 dir.parent = dirVnodeInfo->vnodes[i].parent;
3518 dir.haveDot = dir.haveDotDot = 0;
3519 dir.ds_linkH = alinkH;
3520 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3521 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3523 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3526 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3527 (Testing ? "skipping" : "salvaging"));
3530 CopyAndSalvage(salvinfo, &dir);
3532 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3535 dirHandle = dir.dirHandle;
3538 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3539 &dirVnodeInfo->vnodes[i], path);
3542 /* If enumeration failed for random reasons, we will probably delete
3543 * too much stuff, so we guard against this instead.
3545 struct judgeEntry_params judge_params;
3546 judge_params.salvinfo = salvinfo;
3547 judge_params.dir = &dir;
3549 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3550 &judge_params) == 0);
3553 /* Delete the old directory if it was copied in order to salvage.
3554 * CopyOnWrite has written the new inode # to the disk, but we still
3555 * have the old one in our local structure here. Thus, we idec the
3559 if (dir.copied && !Testing) {
3560 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3561 opr_Assert(code == 0);
3562 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3565 /* Remember rootdir DirSummary _after_ it has been judged */
3566 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3567 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3575 * Get a new FID that can be used to create a new file.
3577 * @param[in] volHeader vol header for the volume
3578 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3579 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3580 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3581 * updated to the new max unique if we create a new
3585 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3586 VnodeClass class, AFSFid *afid, Unique *maxunique)
3589 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3590 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3594 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3595 /* no free vnodes; make a new one */
3596 salvinfo->vnodeInfo[class].nVnodes++;
3597 salvinfo->vnodeInfo[class].vnodes =
3598 realloc(salvinfo->vnodeInfo[class].vnodes,
3599 sizeof(struct VnodeEssence) * (i+1));
3601 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3604 afid->Vnode = bitNumberToVnodeNumber(i, class);
3606 if (volHeader->uniquifier < (*maxunique + 1)) {
3607 /* header uniq is bad; it will get bumped by 2000 later */
3608 afid->Unique = *maxunique + 1 + 2000;
3611 /* header uniq seems okay; just use that */
3612 afid->Unique = *maxunique = volHeader->uniquifier++;
3617 * Create a vnode for a README file explaining not to use a recreated-root vol.
3619 * @param[in] volHeader vol header for the volume
3620 * @param[in] alinkH ihandle for i/o for the volume
3621 * @param[in] vid volume id
3622 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3623 * updated to the new max unique if we create a new
3625 * @param[out] afid FID for the new readme vnode
3626 * @param[out] ainode the inode for the new readme file
3628 * @return operation status
3633 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3634 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3638 struct VnodeDiskObject *rvnode = NULL;
3640 IHandle_t *readmeH = NULL;
3641 struct VnodeEssence *vep;
3643 time_t now = time(NULL);
3645 /* Try to make the note brief, but informative. Only administrators should
3646 * be able to read this file at first, so we can hopefully assume they
3647 * know what AFS is, what a volume is, etc. */
3649 "This volume has been salvaged, but has lost its original root directory.\n"
3650 "The root directory that exists now has been recreated from orphan files\n"
3651 "from the rest of the volume. This recreated root directory may interfere\n"
3652 "with old cached data on clients, and there is no way the salvager can\n"
3653 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3654 "use this volume, but only copy the salvaged data to a new volume.\n"
3655 "Continuing to use this volume as it exists now may cause some clients to\n"
3656 "behave oddly when accessing this volume.\n"
3657 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3658 /* ^ the person reading this probably just lost some data, so they could
3659 * use some cheering up. */
3661 /* -1 for the trailing NUL */
3662 length = sizeof(readme) - 1;
3664 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3666 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3668 /* create the inode and write the contents */
3669 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3670 salvinfo->fileSysPath, 0, vid,
3671 afid->Vnode, afid->Unique, 1);
3672 if (!VALID_INO(readmeinode)) {
3673 Log("CreateReadme: readme IH_CREATE failed\n");
3677 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3678 bytes = IH_IWRITE(readmeH, 0, readme, length);
3679 IH_RELEASE(readmeH);
3681 if (bytes != length) {
3682 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3683 (int)sizeof(readme));
3687 /* create the vnode and write it out */
3688 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3690 Log("CreateRootDir: error alloc'ing memory\n");
3694 rvnode->type = vFile;
3696 rvnode->modeBits = 0777;
3697 rvnode->linkCount = 1;
3698 VNDISK_SET_LEN(rvnode, length);
3699 rvnode->uniquifier = afid->Unique;
3700 rvnode->dataVersion = 1;
3701 VNDISK_SET_INO(rvnode, readmeinode);
3702 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3707 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3709 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3710 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3711 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3713 if (bytes != SIZEOF_SMALLDISKVNODE) {
3714 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3715 (int)SIZEOF_SMALLDISKVNODE);
3719 /* update VnodeEssence for new readme vnode */
3720 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3722 vep->blockCount = nBlocks(length);
3723 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3724 vep->parent = rvnode->parent;
3725 vep->unique = rvnode->uniquifier;
3726 vep->modeBits = rvnode->modeBits;
3727 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3728 vep->type = rvnode->type;
3729 vep->author = rvnode->author;
3730 vep->owner = rvnode->owner;
3731 vep->group = rvnode->group;
3741 *ainode = readmeinode;
3746 if (IH_DEC(alinkH, readmeinode, vid)) {
3747 Log("CreateReadme (recovery): IH_DEC failed\n");
3759 * create a root dir for a volume that lacks one.
3761 * @param[in] volHeader vol header for the volume
3762 * @param[in] alinkH ihandle for disk access for this volume group
3763 * @param[in] vid volume id we're dealing with
3764 * @param[out] rootdir populated with info about the new root dir
3765 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3766 * updated to the new max unique if we create a new
3769 * @return operation status
3774 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3775 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3779 int decroot = 0, decreadme = 0;
3780 AFSFid did, readmeid;
3783 struct VnodeDiskObject *rootvnode = NULL;
3784 struct acl_accessList *ACL;
3787 struct VnodeEssence *vep;
3788 Inode readmeinode = 0;
3789 time_t now = time(NULL);
3791 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3792 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3796 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3797 /* We don't have any large vnodes in the volume; allocate room
3798 * for one so we can recreate the root dir */
3799 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3800 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3801 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3803 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3804 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3807 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3808 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3809 if (vep->type != vNull) {
3810 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3814 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3815 &readmeinode) != 0) {
3820 /* set the DV to a very high number, so it is unlikely that we collide
3821 * with a cached DV */
3824 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3826 if (!VALID_INO(rootinode)) {
3827 Log("CreateRootDir: IH_CREATE failed\n");
3832 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3833 rootinode, &salvinfo->VolumeChanged);
3837 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3838 Log("CreateRootDir: MakeDir failed\n");
3841 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3842 Log("CreateRootDir: Create failed\n");
3846 length = afs_dir_Length(&rootdir->dirHandle);
3847 DZap(&rootdir->dirHandle);
3849 /* create the new root dir vnode */
3850 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3852 Log("CreateRootDir: malloc failed\n");
3856 /* only give 'rl' permissions to 'system:administrators'. We do this to
3857 * try to catch the attention of an administrator, that they should not
3858 * be writing to this directory or continue to use it. */
3859 ACL = VVnodeDiskACL(rootvnode);
3860 ACL->size = sizeof(struct acl_accessList);
3861 ACL->version = ACL_ACLVERSION;
3865 ACL->entries[0].id = -204; /* system:administrators */
3866 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3868 rootvnode->type = vDirectory;
3869 rootvnode->cloned = 0;
3870 rootvnode->modeBits = 0777;
3871 rootvnode->linkCount = 2;
3872 VNDISK_SET_LEN(rootvnode, length);
3873 rootvnode->uniquifier = 1;
3874 rootvnode->dataVersion = dv;
3875 VNDISK_SET_INO(rootvnode, rootinode);
3876 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3877 rootvnode->author = 0;
3878 rootvnode->owner = 0;
3879 rootvnode->parent = 0;
3880 rootvnode->group = 0;
3881 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3883 /* write it out to disk */
3884 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3885 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3886 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3888 if (bytes != SIZEOF_LARGEDISKVNODE) {
3889 /* just cast to int and don't worry about printing real 64-bit ints;
3890 * a large disk vnode isn't anywhere near the 32-bit limit */
3891 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3892 (int)SIZEOF_LARGEDISKVNODE);
3896 /* update VnodeEssence for the new root vnode */
3897 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3899 vep->blockCount = nBlocks(length);
3900 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3901 vep->parent = rootvnode->parent;
3902 vep->unique = rootvnode->uniquifier;
3903 vep->modeBits = rootvnode->modeBits;
3904 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3905 vep->type = rootvnode->type;
3906 vep->author = rootvnode->author;
3907 vep->owner = rootvnode->owner;
3908 vep->group = rootvnode->group;
3918 /* update DirSummary for the new root vnode */
3919 rootdir->vnodeNumber = 1;
3920 rootdir->unique = 1;
3921 rootdir->haveDot = 1;
3922 rootdir->haveDotDot = 1;
3923 rootdir->rwVid = vid;
3924 rootdir->copied = 0;
3925 rootdir->parent = 0;
3926 rootdir->name = strdup(".");
3927 rootdir->vname = volHeader->name;
3928 rootdir->ds_linkH = alinkH;
3935 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3936 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3938 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3939 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3949 * salvage a volume group.
3951 * @param[in] salvinfo information for the curent salvage job
3952 * @param[in] rwIsp inode summary for rw volume
3953 * @param[in] alinkH link table inode handle
3955 * @return operation status
3959 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3961 /* This routine, for now, will only be called for read-write volumes */
3963 int BlocksInVolume = 0, FilesInVolume = 0;
3965 struct DirSummary rootdir, oldrootdir;
3966 struct VnodeInfo *dirVnodeInfo;
3967 struct VnodeDiskObject vnode;
3968 VolumeDiskData volHeader;
3970 int orphaned, rootdirfound = 0;
3971 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3972 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3973 struct VnodeEssence *vep;
3976 afs_sfsize_t nBytes;
3978 VnodeId LFVnode, ThisVnode;
3979 Unique LFUnique, ThisUnique;
3983 vid = rwIsp->volSummary->header.id;
3984 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3985 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3986 opr_Assert(nBytes == sizeof(volHeader));
3987 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3988 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3989 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3991 DistilVnodeEssence(salvinfo, vid, vLarge,
3992 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3993 DistilVnodeEssence(salvinfo, vid, vSmall,
3994 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3996 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3997 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3998 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3999 &rootdir, &rootdirfound);
4002 nt_sync(salvinfo->fileSysDevice);
4004 sync(); /* This used to be done lower level, for every dir */
4011 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
4013 Log("Cannot find root directory for volume %lu; attempting to create "
4014 "a new one\n", afs_printable_uint32_lu(vid));
4016 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
4021 salvinfo->VolumeChanged = 1;
4025 /* Parse each vnode looking for orphaned vnodes and
4026 * connect them to the tree as orphaned (if requested).
4028 oldrootdir = rootdir;
4029 for (class = 0; class < nVNODECLASSES; class++) {
4030 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
4031 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
4032 ThisVnode = bitNumberToVnodeNumber(v, class);
4033 ThisUnique = vep->unique;
4035 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
4036 continue; /* Ignore unused, claimed, and root vnodes */
4038 /* This vnode is orphaned. If it is a directory vnode, then the '..'
4039 * entry in this vnode had incremented the parent link count (In
4040 * JudgeEntry()). We need to go to the parent and decrement that
4041 * link count. But if the parent's unique is zero, then the parent
4042 * link count was not incremented in JudgeEntry().
4044 if (class == vLarge) { /* directory vnode */
4045 pv = vnodeIdToBitNumber(vep->parent);
4046 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
4047 if (vep->parent == 1 && newrootdir) {
4048 /* this vnode's parent was the volume root, and
4049 * we just created the volume root. So, the parent
4050 * dir didn't exist during JudgeEntry, so the link
4051 * count was not inc'd there, so don't dec it here.
4057 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
4063 continue; /* If no rootdir, can't attach orphaned files */
4065 /* Here we attach orphaned files and directories into the
4066 * root directory, LVVnode, making sure link counts stay correct.
4068 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
4069 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
4070 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
4072 /* Update this orphaned vnode's info. Its parent info and
4073 * link count (do for orphaned directories and files).
4075 vep->parent = LFVnode; /* Parent is the root dir */
4076 vep->unique = LFUnique;
4079 vep->count--; /* Inc link count (root dir will pt to it) */
4081 /* If this orphaned vnode is a directory, change '..'.
4082 * The name of the orphaned dir/file is unknown, so we
4083 * build a unique name. No need to CopyOnWrite the directory
4084 * since it is not connected to tree in BK or RO volume and
4085 * won't be visible there.
4087 if (class == vLarge) {
4091 /* Remove and recreate the ".." entry in this orphaned directory */
4092 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4093 salvinfo->vnodeInfo[class].inodes[v],
4094 &salvinfo->VolumeChanged);
4096 pa.Unique = LFUnique;
4097 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4098 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4100 /* The original parent's link count was decremented above.
4101 * Here we increment the new parent's link count.
4103 pv = vnodeIdToBitNumber(LFVnode);
4104 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4108 /* Go to the root dir and add this entry. The link count of the
4109 * root dir was incremented when ".." was created. Try 10 times.
4111 for (j = 0; j < 10; j++) {
4112 pa.Vnode = ThisVnode;
4113 pa.Unique = ThisUnique;
4115 snprintf(npath, sizeof npath, "%s.%u.%u",
4116 ((class == vLarge) ? "__ORPHANDIR__"
4117 : "__ORPHANFILE__"),
4118 ThisVnode, ThisUnique);
4120 CopyOnWrite(salvinfo, &rootdir);
4121 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4125 ThisUnique += 50; /* Try creating a different file */
4127 opr_Assert(code == 0);
4128 Log("Attaching orphaned %s to volume's root dir as %s\n",
4129 ((class == vLarge) ? "directory" : "file"), npath);
4131 } /* for each vnode in the class */
4132 } /* for each class of vnode */
4134 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4136 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4138 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4140 opr_Assert(code == 0);
4141 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4144 DFlush(); /* Flush the changes */
4145 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4146 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4147 orphans = ORPH_IGNORE;
4150 /* Write out all changed vnodes. Orphaned files and directories
4151 * will get removed here also (if requested).
4153 for (class = 0; class < nVNODECLASSES; class++) {
4154 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4155 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4156 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4157 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4158 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4159 for (i = 0; i < nVnodes; i++) {
4160 struct VnodeEssence *vnp = &vnodes[i];
4161 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4163 /* If the vnode is good but is unclaimed (not listed in
4164 * any directory entries), then it is orphaned.
4167 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4168 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4172 if (vnp->changed || vnp->count) {
4175 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4176 vnodeIndexOffset(vcp, vnodeNumber),
4177 (char *)&vnode, sizeof(vnode));
4178 opr_Assert(nBytes == sizeof(vnode));
4180 vnode.parent = vnp->parent;
4181 oldCount = vnode.linkCount;
4182 vnode.linkCount = vnode.linkCount - vnp->count;
4185 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4187 if (!vnp->todelete) {
4188 /* Orphans should have already been attached (if requested) */
4189 opr_Assert(orphans != ORPH_ATTACH);
4190 oblocks += vnp->blockCount;
4193 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4195 BlocksInVolume -= vnp->blockCount;
4197 if (VNDISK_GET_INO(&vnode)) {
4199 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4200 opr_Assert(code == 0);
4202 memset(&vnode, 0, sizeof(vnode));
4204 } else if (vnp->count) {
4206 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4209 vnode.modeBits = vnp->modeBits;
4212 vnode.dataVersion++;
4215 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4216 vnodeIndexOffset(vcp, vnodeNumber),
4217 (char *)&vnode, sizeof(vnode));
4218 opr_Assert(nBytes == sizeof(vnode));
4220 salvinfo->VolumeChanged = 1;
4224 if (!Showmode && ofiles) {
4225 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4227 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4231 for (class = 0; class < nVNODECLASSES; class++) {
4232 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4233 for (i = 0; i < vip->nVnodes; i++)
4234 if (vip->vnodes[i].name)
4235 free(vip->vnodes[i].name);
4242 /* Set correct resource utilization statistics */
4243 volHeader.filecount = FilesInVolume;
4244 volHeader.diskused = BlocksInVolume;
4246 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4247 if (volHeader.uniquifier < (maxunique + 1)) {
4249 Log("Volume uniquifier %u is too low (max uniq %u); fixed\n", volHeader.uniquifier, maxunique);
4250 /* Plus 2,000 in case there are workstations out there with
4251 * cached vnodes that have since been deleted
4253 volHeader.uniquifier = (maxunique + 1 + 2000);
4257 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4258 "Only use this salvaged volume to copy data to another volume; "
4259 "do not continue to use this volume (%lu) as-is.\n",
4260 afs_printable_uint32_lu(vid));
4263 if (!Testing && salvinfo->VolumeChanged) {
4264 #ifdef FSSYNC_BUILD_CLIENT
4265 if (salvinfo->useFSYNC) {
4266 afs_int32 fsync_code;
4268 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4270 Log("Error trying to tell the fileserver to break callbacks for "
4271 "changed volume %lu; error code %ld\n",
4272 afs_printable_uint32_lu(vid),
4273 afs_printable_int32_ld(fsync_code));
4275 salvinfo->VolumeChanged = 0;
4278 #endif /* FSSYNC_BUILD_CLIENT */
4280 #ifdef AFS_DEMAND_ATTACH_FS
4281 if (!salvinfo->useFSYNC) {
4282 /* A volume's contents have changed, but the fileserver will not
4283 * break callbacks on the volume until it tries to load the vol
4284 * header. So, to reduce the amount of time a client could have
4285 * stale data, remove fsstate.dat, so the fileserver will init
4286 * callback state with all clients. This is a very coarse hammer,
4287 * and in the future we should just record which volumes have
4289 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4290 if (code && errno != ENOENT) {
4291 Log("Error %d when trying to unlink FS state file %s\n", errno,
4292 AFSDIR_SERVER_FSSTATE_FILEPATH);
4298 /* Turn off the inUse bit; the volume's been salvaged! */
4299 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4300 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4301 volHeader.inService = 1; /* allow service again */
4302 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4303 volHeader.dontSalvage = DONT_SALVAGE;
4304 salvinfo->VolumeChanged = 0;
4306 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4307 opr_Assert(nBytes == sizeof(volHeader));
4310 Log("%sSalvaged %s (%" AFS_VOLID_FMT "): %d files, %d blocks\n",
4311 (Testing ? "It would have " : ""), volHeader.name, afs_printable_VolumeId_lu(volHeader.id),
4312 FilesInVolume, BlocksInVolume);
4315 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4316 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4322 ClearROInUseBit(struct VolumeSummary *summary)
4324 IHandle_t *h = summary->volumeInfoHandle;
4325 afs_sfsize_t nBytes;
4327 VolumeDiskData volHeader;
4329 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4330 opr_Assert(nBytes == sizeof(volHeader));
4331 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4332 volHeader.inUse = 0;
4333 volHeader.needsSalvaged = 0;
4334 volHeader.inService = 1;
4335 volHeader.dontSalvage = DONT_SALVAGE;
4337 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4338 opr_Assert(nBytes == sizeof(volHeader));
4343 * Possible delete the volume.
4345 * deleteMe - Always do so, only a partial volume.
4348 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4349 char *message, int deleteMe, int check)
4351 if (readOnly(isp) || deleteMe) {
4352 if (isp->volSummary && !isp->volSummary->deleted) {
4355 Log("Volume %" AFS_VOLID_FMT " (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", afs_printable_VolumeId_lu(isp->volumeId));
4357 Log("It will be deleted on this server (you may find it elsewhere)\n");
4360 Log("Volume %" AFS_VOLID_FMT " needs to be salvaged. Since it is read-only, however,\n", afs_printable_VolumeId_lu(isp->volumeId));
4362 Log("it will be deleted instead. It should be recloned.\n");
4367 char filename[VMAXPATHLEN];
4368 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4369 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4371 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4373 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
4374 afs_printable_int32_ld(code),
4375 afs_printable_VolumeId_lu(isp->volumeId));
4378 /* make sure we actually delete the header file; ENOENT
4379 * is fine, since VDestroyVolumeDiskHeader probably already
4381 if (unlink(path) && errno != ENOENT) {
4382 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4384 if (salvinfo->useFSYNC) {
4385 AskDelete(salvinfo, isp->volumeId);
4387 isp->volSummary->deleted = 1;
4390 } else if (!check) {
4391 Log("%s salvage was unsuccessful: read-write volume %" AFS_VOLID_FMT "\n", message,
4392 afs_printable_VolumeId_lu(isp->volumeId));
4393 Abort("Salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
4397 #ifdef AFS_DEMAND_ATTACH_FS
4399 * Locks a volume on disk for salvaging.
4401 * @param[in] volumeId volume ID to lock
4403 * @return operation status
4405 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4406 * checked out and locked again
4411 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4416 /* should always be WRITE_LOCK, but keep the lock-type logic all
4417 * in one place, in VVolLockType. Params will be ignored, but
4418 * try to provide what we're logically doing. */
4419 locktype = VVolLockType(V_VOLUPD, 1);
4421 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4423 if (code == EBUSY) {
4424 Abort("Someone else appears to be using volume %lu; Aborted\n",
4425 afs_printable_uint32_lu(volumeId));
4427 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4428 afs_printable_int32_ld(code),
4429 afs_printable_uint32_lu(volumeId));
4432 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4433 if (code == SYNC_DENIED) {
4434 /* need to retry checking out volumes */
4437 if (code != SYNC_OK) {
4438 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4439 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4442 /* set inUse = programType in the volume header to ensure that nobody
4443 * tries to use this volume again without salvaging, if we somehow crash
4444 * or otherwise exit before finishing the salvage.
4448 struct VolumeHeader header;
4449 struct VolumeDiskHeader diskHeader;
4450 struct VolumeDiskData volHeader;
4452 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4457 DiskToVolumeHeader(&header, &diskHeader);
4459 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4460 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4461 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4467 volHeader.inUse = programType;
4469 /* If we can't re-write the header, bail out and error. We don't
4470 * assert when reading the header, since it's possible the
4471 * header isn't really there (when there's no data associated
4472 * with the volume; we just delete the vol header file in that
4473 * case). But if it's there enough that we can read it, but
4474 * somehow we cannot write to it to signify we're salvaging it,
4475 * we've got a big problem and we cannot continue. */
4476 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4477 == sizeof(volHeader));
4484 #endif /* AFS_DEMAND_ATTACH_FS */
4487 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4489 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4491 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4492 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4493 if (code != SYNC_OK) {
4494 Log("AskError: failed to force volume %lu into error state; "
4495 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4496 (long)code, SYNC_res2string(code));
4498 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4502 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4507 memset(&res, 0, sizeof(res));
4509 for (i = 0; i < 3; i++) {
4510 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4511 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4513 if (code == SYNC_OK) {
4515 } else if (code == SYNC_DENIED) {
4517 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4519 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4520 Abort("Salvage aborted\n");
4521 } else if (code == SYNC_BAD_COMMAND) {
4522 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4525 #ifdef AFS_DEMAND_ATTACH_FS
4526 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4528 Log("AskOffline: fileserver is DAFS but we are not.\n");
4531 #ifdef AFS_DEMAND_ATTACH_FS
4532 Log("AskOffline: fileserver is not DAFS but we are.\n");
4534 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4537 Abort("Salvage aborted\n");
4540 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4541 FSYNC_clientFinis();
4545 if (code != SYNC_OK) {
4546 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4547 Abort("Salvage aborted\n");
4551 /* don't want to pass around state; remember it here */
4552 static int isDAFS = -1;
4557 afs_int32 code = 1, i;
4559 /* we don't care if we race. the answer shouldn't change */
4563 memset(&res, 0, sizeof(res));
4565 for (i = 0; code && i < 3; i++) {
4566 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4568 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4569 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4570 FSYNC_reason2string(res.hdr.reason));
4571 FSYNC_clientFinis();
4577 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4581 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4591 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4593 struct VolumeDiskHeader diskHdr;
4595 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4597 /* volume probably does not exist; no need to bring back online */
4600 AskOnline(salvinfo, volumeId);
4604 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4608 for (i = 0; i < 3; i++) {
4609 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4610 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4612 if (code == SYNC_OK) {
4614 } else if (code == SYNC_DENIED) {
4615 Log("AskOnline: file server denied online request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4616 } else if (code == SYNC_BAD_COMMAND) {
4617 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4619 Log("AskOnline: please make sure file server binaries are same version.\n");
4623 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4624 FSYNC_clientFinis();
4631 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4636 for (i = 0; i < 3; i++) {
4637 memset(&res, 0, sizeof(res));
4638 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4639 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4641 if (code == SYNC_OK) {
4643 } else if (code == SYNC_DENIED) {
4644 Log("AskOnline: file server denied DONE request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4645 } else if (code == SYNC_BAD_COMMAND) {
4646 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4649 #ifdef AFS_DEMAND_ATTACH_FS
4650 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4652 Log("AskOnline: fileserver is DAFS but we are not.\n");
4655 #ifdef AFS_DEMAND_ATTACH_FS
4656 Log("AskOnline: fileserver is not DAFS but we are.\n");
4658 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4662 } else if (code == SYNC_FAILED &&
4663 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4664 res.hdr.reason == FSYNC_WRONG_PART)) {
4665 /* volume is already effectively 'deleted' */
4669 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4670 FSYNC_clientFinis();
4677 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4679 /* Volume parameter is passed in case iopen is upgraded in future to
4680 * require a volume Id to be passed
4683 IHandle_t *srcH, *destH;
4684 FdHandle_t *srcFdP, *destFdP;
4686 afs_foff_t size = 0;
4688 IH_INIT(srcH, device, rwvolume, inode1);
4689 srcFdP = IH_OPEN(srcH);
4690 opr_Assert(srcFdP != NULL);
4691 IH_INIT(destH, device, rwvolume, inode2);
4692 destFdP = IH_OPEN(destH);
4693 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4694 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4697 opr_Assert(nBytes == 0);
4698 FDH_REALLYCLOSE(srcFdP);
4699 FDH_REALLYCLOSE(destFdP);
4706 PrintInodeList(struct SalvInfo *salvinfo)
4708 struct ViceInodeInfo *ip;
4709 struct ViceInodeInfo *buf;
4712 afs_sfsize_t st_size;
4714 st_size = OS_SIZE(salvinfo->inodeFd);
4715 opr_Assert(st_size >= 0);
4716 buf = malloc(st_size);
4717 opr_Assert(buf != NULL);
4718 nInodes = st_size / sizeof(struct ViceInodeInfo);
4719 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4720 for (ip = buf; nInodes--; ip++) {
4721 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%" AFS_VOLID_FMT ",%u,%u,%u)\n", /* VolumeId in param */
4722 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4723 (afs_uintmax_t) ip->byteCount,
4724 afs_printable_VolumeId_lu(ip->u.param[0]), ip->u.param[1],
4725 ip->u.param[2], ip->u.param[3]);
4731 PrintInodeSummary(struct SalvInfo *salvinfo)
4734 struct InodeSummary *isp;
4736 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4737 isp = &salvinfo->inodeSummary[i];
4738 Log("VID:%" AFS_VOLID_FMT ", RW:%" AFS_VOLID_FMT ", index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->RWvolumeId), isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4748 opr_Assert(0); /* Fork is never executed in the NT code path */
4752 #ifdef AFS_DEMAND_ATTACH_FS
4753 if ((f == 0) && (programType == salvageServer)) {
4754 /* we are a salvageserver child */
4755 #ifdef FSSYNC_BUILD_CLIENT
4756 VChildProcReconnectFS_r();
4758 #ifdef SALVSYNC_BUILD_CLIENT
4762 #endif /* AFS_DEMAND_ATTACH_FS */
4763 #endif /* !AFS_NT40_ENV */
4770 #ifdef AFS_DEMAND_ATTACH_FS
4771 if (programType == salvageServer) {
4772 /* release all volume locks before closing down our SYNC channels.
4773 * the fileserver may try to online volumes we have checked out when
4774 * we close down FSSYNC, so we should make sure we don't have those
4775 * volumes locked when it does */
4776 struct DiskPartition64 *dp;
4778 for (i = 0; i <= VOLMAXPARTS; i++) {
4779 dp = VGetPartitionById(i, 0);
4781 VLockFileReinit(&dp->volLockFile);
4784 # ifdef SALVSYNC_BUILD_CLIENT
4787 # ifdef FSSYNC_BUILD_CLIENT
4791 #endif /* AFS_DEMAND_ATTACH_FS */
4794 if (main_thread != pthread_self())
4795 pthread_exit((void *)code);
4816 pid = wait(&status);
4817 opr_Assert(pid != -1);
4818 if (WCOREDUMP(status))
4819 Log("\"%s\" core dumped!\n", prog);
4820 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4826 TimeStamp(char *buffer, size_t size, time_t clock, int precision)
4831 lt = localtime(&clock);
4833 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M:%S", lt);
4835 nbytes = strftime(buffer, size, "%m/%d/%Y %H:%M", lt);
4837 memset(buffer, 0, size);
4842 SalvageShowLog(void)
4847 if (ShowLog == 0 || ClientMode) {
4851 if (ShowLogFilename == NULL) {
4852 ShowLogFilename = strdup(AFSDIR_SERVER_SLVGLOG_FILEPATH);
4855 logFile = afs_fopen(ShowLogFilename, "r");
4857 printf("Can't read %s, exiting\n", ShowLogFilename);
4859 while (fgets(line, sizeof(line), logFile))
4866 vLog(const char *format, va_list args)
4869 vFSLog(format, args);
4874 gettimeofday(&now, NULL);
4875 fprintf(stderr, "%s ", TimeStamp(buffer, sizeof(buffer), now.tv_sec, 1));
4876 vfprintf(stderr, format, args);
4882 Log(const char *format, ...)
4886 va_start(args, format);
4892 Abort(const char *format, ...)
4896 va_start(args, format);
4906 ToString(const char *s)
4910 opr_Assert(p != NULL);
4914 /* Remove the FORCESALVAGE file */
4916 RemoveTheForce(char *path)
4919 struct afs_stat_st force; /* so we can use afs_stat to find it */
4920 strcpy(target,path);
4921 strcat(target,"/FORCESALVAGE");
4922 if (!Testing && ForceSalvage) {
4923 if (afs_stat(target,&force) == 0) unlink(target);
4927 #ifndef AFS_AIX32_ENV
4929 * UseTheForceLuke - see if we can use the force
4932 UseTheForceLuke(char *path)
4934 struct afs_stat_st force;
4936 strcpy(target,path);
4937 strcat(target,"/FORCESALVAGE");
4939 return (afs_stat(target, &force) == 0);
4943 * UseTheForceLuke - see if we can use the force
4946 * The VRMIX fsck will not muck with the filesystem it is supposedly
4947 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4948 * muck directly with the root inode, which is within the normal
4950 * ListViceInodes() has a side effect of setting ForceSalvage if
4951 * it detects a need, based on root inode examination.
4954 UseTheForceLuke(char *path)
4957 return 0; /* sorry OB1 */
4962 /* NT support routines */
4964 static char execpathname[MAX_PATH];
4966 nt_SalvagePartition(char *partName, int jobn)
4971 if (!*execpathname) {
4972 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4973 if (!n || n == 1023)
4976 job.cj_magic = SALVAGER_MAGIC;
4977 job.cj_number = jobn;
4978 (void)strcpy(job.cj_part, partName);
4979 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4984 nt_SetupPartitionSalvage(void *datap, int len)
4986 childJob_t *jobp = (childJob_t *) datap;
4989 if (len != sizeof(childJob_t))
4991 if (jobp->cj_magic != SALVAGER_MAGIC)
4996 if (asprintf(&logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4997 myjob.cj_number) < 0)
5006 #endif /* AFS_NT40_ENV */