2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
208 extern void *calloc();
210 static char *TimeStamp(time_t clock, int precision);
213 int debug; /* -d flag */
214 extern int Testing; /* -n flag */
215 int ListInodeOption; /* -i flag */
216 int ShowRootFiles; /* -r flag */
217 int RebuildDirs; /* -sal flag */
218 int Parallel = 4; /* -para X flag */
219 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
220 int forceR = 0; /* -b flag */
221 int ShowLog = 0; /* -showlog flag */
222 int ShowSuid = 0; /* -showsuid flag */
223 int ShowMounts = 0; /* -showmounts flag */
224 int orphans = ORPH_IGNORE; /* -orphans option */
229 int useSyslog = 0; /* -syslog flag */
230 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
239 #define MAXPARALLEL 32
241 int OKToZap; /* -o flag */
242 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
243 * in the volume header */
245 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
247 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
250 * information that is 'global' to a particular salvage job.
253 Device fileSysDevice; /**< The device number of the current partition
255 char fileSysPath[8]; /**< The path of the mounted partition currently
256 * being salvaged, i.e. the directory containing
257 * the volume headers */
258 char *fileSysPathName; /**< NT needs this to make name pretty log. */
259 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
260 int VGLinkH_cnt; /**< # of references to lnk handle. */
261 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
264 char *fileSysDeviceName; /**< The block device where the file system being
265 * salvaged was mounted */
266 char *filesysfulldev;
268 int VolumeChanged; /**< Set by any routine which would change the
269 * volume in a way which would require callbacks
270 * to be broken if the volume was put back on
271 * on line by an active file server */
273 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
274 * header dealt with */
276 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
277 FD_t inodeFd; /**< File descriptor for inode file */
279 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
280 int nVolumes; /**< Number of volumes (read-write and read-only)
281 * in volume summary */
282 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
285 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
286 * vnodes in the volume that
287 * we are currently looking
289 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
290 * to contact the fileserver over FSYNC */
297 /* Forward declarations */
298 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
299 static int AskVolumeSummary(struct SalvInfo *salvinfo,
300 VolumeId singleVolumeNumber);
302 #ifdef AFS_DEMAND_ATTACH_FS
303 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
304 #endif /* AFS_DEMAND_ATTACH_FS */
306 /* Uniquifier stored in the Inode */
311 return (u & 0x3fffff);
313 #if defined(AFS_SGI_EXMAG)
314 return (u & SGI_UNIQMASK);
317 #endif /* AFS_SGI_EXMAG */
324 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
326 return 0; /* otherwise may be transient, e.g. EMFILE */
331 char *save_args[MAX_ARGS];
333 extern pthread_t main_thread;
334 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
338 * Get the salvage lock if not already held. Hold until process exits.
340 * @param[in] locktype READ_LOCK or WRITE_LOCK
343 _ObtainSalvageLock(int locktype)
345 struct VLockFile salvageLock;
350 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
352 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
355 "salvager: There appears to be another salvager running! "
360 "salvager: Error %d trying to acquire salvage lock! "
366 ObtainSalvageLock(void)
368 _ObtainSalvageLock(WRITE_LOCK);
371 ObtainSharedSalvageLock(void)
373 _ObtainSalvageLock(READ_LOCK);
377 #ifdef AFS_SGI_XFS_IOPS_ENV
378 /* Check if the given partition is mounted. For XFS, the root inode is not a
379 * constant. So we check the hard way.
382 IsPartitionMounted(char *part)
385 struct mntent *mntent;
387 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
388 while (mntent = getmntent(mntfp)) {
389 if (!strcmp(part, mntent->mnt_dir))
394 return mntent ? 1 : 1;
397 /* Check if the given inode is the root of the filesystem. */
398 #ifndef AFS_SGI_XFS_IOPS_ENV
400 IsRootInode(struct afs_stat_st *status)
403 * The root inode is not a fixed value in XFS partitions. So we need to
404 * see if the partition is in the list of mounted partitions. This only
405 * affects the SalvageFileSys path, so we check there.
407 return (status->st_ino == ROOTINODE);
412 #ifndef AFS_NAMEI_ENV
413 /* We don't want to salvage big files filesystems, since we can't put volumes on
417 CheckIfBigFilesFS(char *mountPoint, char *devName)
419 struct superblock fs;
422 if (strncmp(devName, "/dev/", 5)) {
423 (void)sprintf(name, "/dev/%s", devName);
425 (void)strcpy(name, devName);
428 if (ReadSuper(&fs, name) < 0) {
429 Log("Unable to read superblock. Not salvaging partition %s.\n",
433 if (IsBigFilesFileSystem(&fs)) {
434 Log("Partition %s is a big files filesystem, not salvaging.\n",
444 #define HDSTR "\\Device\\Harddisk"
445 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
447 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
453 static int dowarn = 1;
455 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
457 if (strncmp(res1, HDSTR, HDLEN)) {
460 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
461 res1, HDSTR, p1->devName);
464 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
466 if (strncmp(res2, HDSTR, HDLEN)) {
469 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
470 res2, HDSTR, p2->devName);
474 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
562 osi_Assert(pid != -1);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
567 osi_Assert(j < numjobs);
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
698 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 FD_t inodeFile = INVALID_FD;
712 static char tmpDevName[100];
713 static char wpath[100];
714 struct VolumeSummary *vsp, *esp;
718 struct SalvInfo l_salvinfo;
719 struct SalvInfo *salvinfo = &l_salvinfo;
722 memset(salvinfo, 0, sizeof(*salvinfo));
725 if (inodeFile != INVALID_FD) {
727 inodeFile = INVALID_FD;
729 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
730 Abort("Raced too many times with fileserver restarts while trying to "
731 "checkout/lock volumes; Aborted\n");
733 #ifdef AFS_DEMAND_ATTACH_FS
735 /* unlock all previous volume locks, since we're about to lock them
737 VLockFileReinit(&partP->volLockFile);
739 #endif /* AFS_DEMAND_ATTACH_FS */
741 salvinfo->fileSysPartition = partP;
742 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
743 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
746 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
747 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
748 name = partP->devName;
750 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
751 strcpy(tmpDevName, partP->devName);
752 name = get_DevName(tmpDevName, wpath);
753 salvinfo->fileSysDeviceName = name;
754 salvinfo->filesysfulldev = wpath;
757 if (singleVolumeNumber) {
758 #ifndef AFS_DEMAND_ATTACH_FS
759 /* only non-DAFS locks the partition when salvaging a single volume;
760 * DAFS will lock the individual volumes in the VG */
761 VLockPartition(partP->name);
762 #endif /* !AFS_DEMAND_ATTACH_FS */
766 /* salvageserver already setup fssync conn for us */
767 if ((programType != salvageServer) && !VConnectFS()) {
768 Abort("Couldn't connect to file server\n");
771 salvinfo->useFSYNC = 1;
772 AskOffline(salvinfo, singleVolumeNumber);
773 #ifdef AFS_DEMAND_ATTACH_FS
774 if (LockVolume(salvinfo, singleVolumeNumber)) {
777 #endif /* AFS_DEMAND_ATTACH_FS */
780 salvinfo->useFSYNC = 0;
781 VLockPartition(partP->name);
785 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
788 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
789 partP->name, name, (Testing ? "(READONLY mode)" : ""));
791 Log("***Forced salvage of all volumes on this partition***\n");
796 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
803 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
804 while ((dp = readdir(dirp))) {
805 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
806 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
808 Log("Removing old salvager temp files %s\n", dp->d_name);
809 strcpy(npath, salvinfo->fileSysPath);
810 strcat(npath, OS_DIRSEP);
811 strcat(npath, dp->d_name);
817 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
819 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
820 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
822 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
826 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC, 0666);
827 if (inodeFile == INVALID_FD) {
828 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
831 /* Using nt_unlink here since we're really using the delete on close
832 * semantics of unlink. In most places in the salvager, we really do
833 * mean to unlink the file at that point. Those places have been
834 * modified to actually do that so that the NT crt can be used there.
836 * jaltman - On NT delete on close cannot be applied to a file while the
837 * process has an open file handle that does not have DELETE file
838 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
839 * delete privileges. As a result the nt_unlink() call will always
842 code = nt_unlink(inodeListPath);
844 code = unlink(inodeListPath);
847 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
850 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
854 salvinfo->inodeFd = inodeFile;
855 if (salvinfo->inodeFd == INVALID_FD)
856 Abort("Temporary file %s is missing...\n", inodeListPath);
857 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
858 if (ListInodeOption) {
859 PrintInodeList(salvinfo);
862 /* enumerate volumes in the partition.
863 * figure out sets of read-only + rw volumes.
864 * salvage each set, read-only volumes first, then read-write.
865 * Fix up inodes on last volume in set (whether it is read-write
868 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
872 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
873 i < salvinfo->nVolumesInInodeFile; i = j) {
874 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
876 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
878 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
879 struct VolumeSummary *tsp;
880 /* Scan volume list (from partition root directory) looking for the
881 * current rw volume number in the volume list from the inode scan.
882 * If there is one here that is not in the inode volume list,
884 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
886 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
888 /* Now match up the volume summary info from the root directory with the
889 * entry in the volume list obtained from scanning inodes */
890 salvinfo->inodeSummary[j].volSummary = NULL;
891 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
892 if (tsp->header.id == vid) {
893 salvinfo->inodeSummary[j].volSummary = tsp;
899 /* Salvage the group of volumes (several read-only + 1 read/write)
900 * starting with the current read-only volume we're looking at.
902 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
905 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
906 for (; vsp < esp; vsp++) {
908 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
911 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
912 RemoveTheForce(salvinfo->fileSysPath);
914 if (!Testing && singleVolumeNumber) {
916 #ifdef AFS_DEMAND_ATTACH_FS
917 /* unlock vol headers so the fs can attach them when we AskOnline */
918 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
919 #endif /* AFS_DEMAND_ATTACH_FS */
921 /* Step through the volumeSummary list and set all volumes on-line.
922 * Most volumes were taken off-line in GetVolumeSummary.
923 * If a volume was deleted, don't tell the fileserver anything, since
924 * we already told the fileserver the volume was deleted back when we
925 * we destroyed the volume header.
926 * Also, make sure we bring the singleVolumeNumber back online first.
929 for (j = 0; j < salvinfo->nVolumes; j++) {
930 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
932 if (!salvinfo->volumeSummaryp[j].deleted) {
933 AskOnline(salvinfo, singleVolumeNumber);
939 /* singleVolumeNumber generally should always be in the constructed
940 * volumeSummary, but just in case it's not... */
941 AskOnline(salvinfo, singleVolumeNumber);
944 for (j = 0; j < salvinfo->nVolumes; j++) {
945 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
946 if (!salvinfo->volumeSummaryp[j].deleted) {
947 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
953 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
954 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
957 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
961 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
964 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
967 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
970 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
972 Log("Error %ld destroying volume disk header for volume %lu\n",
973 afs_printable_int32_ld(code),
974 afs_printable_uint32_lu(vsp->header.id));
977 /* make sure we actually delete the fileName file; ENOENT
978 * is fine, since VDestroyVolumeDiskHeader probably already
980 if (unlink(path) && errno != ENOENT) {
981 Log("Unable to unlink %s (errno = %d)\n", path, errno);
983 if (salvinfo->useFSYNC) {
984 AskDelete(salvinfo, vsp->header.id);
992 CompareInodes(const void *_p1, const void *_p2)
994 const struct ViceInodeInfo *p1 = _p1;
995 const struct ViceInodeInfo *p2 = _p2;
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 VolumeId p1rwid, p2rwid;
1000 (p1->u.vnode.vnodeNumber ==
1001 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1003 (p2->u.vnode.vnodeNumber ==
1004 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1005 if (p1rwid < p2rwid)
1007 if (p1rwid > p2rwid)
1009 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1010 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1011 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1012 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1013 if (p1->u.vnode.volumeId == p1rwid)
1015 if (p2->u.vnode.volumeId == p2rwid)
1017 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1019 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1020 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1021 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1023 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1025 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1027 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1029 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1031 /* The following tests are reversed, so that the most desirable
1032 * of several similar inodes comes first */
1033 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1047 #ifdef AFS_3DISPARES
1048 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1049 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1054 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1072 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1073 #ifdef AFS_3DISPARES
1074 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1075 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1078 #ifdef AFS_SGI_EXMAG
1079 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1080 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1089 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1090 struct InodeSummary *summary)
1092 VolumeId volume = ip->u.vnode.volumeId;
1093 VolumeId rwvolume = volume;
1098 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1100 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1102 rwvolume = ip->u.special.parentId;
1103 /* This isn't quite right, as there could (in error) be different
1104 * parent inodes in different special vnodes */
1106 if (maxunique < ip->u.vnode.vnodeUniquifier)
1107 maxunique = ip->u.vnode.vnodeUniquifier;
1111 summary->volumeId = volume;
1112 summary->RWvolumeId = rwvolume;
1113 summary->nInodes = n;
1114 summary->nSpecialInodes = nSpecial;
1115 summary->maxUniquifier = maxunique;
1119 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1121 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1122 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1123 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1128 * Collect list of inodes in file named by path. If a truly fatal error,
1129 * unlink the file and abort. For lessor errors, return -1. The file will
1130 * be unlinked by the caller.
1133 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1137 struct ViceInodeInfo *ip, *ip_save;
1138 struct InodeSummary summary;
1139 char summaryFileName[50];
1140 FD_t summaryFile = INVALID_FD;
1142 char *dev = salvinfo->fileSysPath;
1143 char *wpath = salvinfo->fileSysPath;
1145 char *dev = salvinfo->fileSysDeviceName;
1146 char *wpath = salvinfo->filesysfulldev;
1148 char *part = salvinfo->fileSysPath;
1151 afs_sfsize_t st_size;
1153 /* This file used to come from vfsck; cobble it up ourselves now... */
1155 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1156 singleVolumeNumber ? OnlyOneVolume : 0,
1157 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1159 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1162 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1164 if (forceSal && !ForceSalvage) {
1165 Log("***Forced salvage of all volumes on this partition***\n");
1168 OS_SEEK(inodeFile, 0L, SEEK_SET);
1169 salvinfo->inodeFd = inodeFile;
1170 if (salvinfo->inodeFd == INVALID_FD ||
1171 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1172 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1174 tdir = (tmpdir ? tmpdir : part);
1176 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1177 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1179 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1180 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1182 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND, 0666);
1183 if (summaryFile == INVALID_FD) {
1184 Abort("Unable to create inode summary file\n");
1188 /* Using nt_unlink here since we're really using the delete on close
1189 * semantics of unlink. In most places in the salvager, we really do
1190 * mean to unlink the file at that point. Those places have been
1191 * modified to actually do that so that the NT crt can be used there.
1193 * jaltman - As commented elsewhere, this cannot work because fopen()
1194 * does not open files with DELETE and FILE_SHARE_DELETE.
1196 code = nt_unlink(summaryFileName);
1198 code = unlink(summaryFileName);
1201 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1204 if (!canfork || debug || Fork() == 0) {
1205 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1207 OS_CLOSE(summaryFile);
1208 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1209 RemoveTheForce(salvinfo->fileSysPath);
1211 struct VolumeSummary *vsp;
1214 GetVolumeSummary(salvinfo, singleVolumeNumber);
1216 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1218 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1221 Log("%s vice inodes on %s; not salvaged\n",
1222 singleVolumeNumber ? "No applicable" : "No", dev);
1225 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1227 OS_CLOSE(summaryFile);
1229 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1232 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1233 OS_CLOSE(summaryFile);
1234 Abort("Unable to read inode table; %s not salvaged\n", dev);
1236 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1237 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1238 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1239 OS_CLOSE(summaryFile);
1240 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1245 CountVolumeInodes(ip, nInodes, &summary);
1246 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1247 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1248 OS_CLOSE(summaryFile);
1251 summary.index += (summary.nInodes);
1252 nInodes -= summary.nInodes;
1253 ip += summary.nInodes;
1256 ip = ip_save = NULL;
1257 /* Following fflush is not fclose, because if it was debug mode would not work */
1258 if (OS_SYNC(summaryFile) == -1) {
1259 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1260 OS_CLOSE(summaryFile);
1263 if (canfork && !debug) {
1268 if (Wait("Inode summary") == -1) {
1269 OS_CLOSE(summaryFile);
1270 Exit(1); /* salvage of this partition aborted */
1274 st_size = OS_SIZE(summaryFile);
1275 osi_Assert(st_size >= 0);
1278 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1279 osi_Assert(salvinfo->inodeSummary != NULL);
1280 /* For GNU we need to do lseek to get the file pointer moved. */
1281 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1282 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1283 osi_Assert(ret == st_size);
1285 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1286 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1287 salvinfo->inodeSummary[i].volSummary = NULL;
1289 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1290 OS_CLOSE(summaryFile);
1294 /* Comparison routine for volume sort.
1295 This is setup so that a read-write volume comes immediately before
1296 any read-only clones of that volume */
1298 CompareVolumes(const void *_p1, const void *_p2)
1300 const struct VolumeSummary *p1 = _p1;
1301 const struct VolumeSummary *p2 = _p2;
1302 if (p1->header.parent != p2->header.parent)
1303 return p1->header.parent < p2->header.parent ? -1 : 1;
1304 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1306 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1308 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1312 * Gleans volumeSummary information by asking the fileserver
1314 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1315 * salvaging a whole partition
1317 * @return whether we obtained the volume summary information or not
1318 * @retval 0 success; we obtained the volume summary information
1319 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1321 * @retval 1 we did not get the volume summary information; either the
1322 * fileserver responded with an error, or we are not supposed to
1323 * ask the fileserver for the information (e.g. we are salvaging
1324 * the entire partition or we are not the salvageserver)
1326 * @note for non-DAFS, always returns 1
1329 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1332 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1333 if (programType == salvageServer) {
1334 if (singleVolumeNumber) {
1335 FSSYNC_VGQry_response_t q_res;
1337 struct VolumeSummary *vsp;
1339 struct VolumeDiskHeader diskHdr;
1341 memset(&res, 0, sizeof(res));
1343 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1346 * We must wait for the partition to finish scanning before
1347 * can continue, since we will not know if we got the entire
1348 * VG membership unless the partition is fully scanned.
1349 * We could, in theory, just scan the partition ourselves if
1350 * the VG cache is not ready, but we would be doing the exact
1351 * same scan the fileserver is doing; it will almost always
1352 * be faster to wait for the fileserver. The only exceptions
1353 * are if the partition does not take very long to scan, and
1354 * in that case it's fast either way, so who cares?
1356 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1357 Log("waiting for fileserver to finish scanning partition %s...\n",
1358 salvinfo->fileSysPartition->name);
1360 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1361 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1362 * just so small partitions don't need to wait over 10
1363 * seconds every time, and large partitions are generally
1364 * polled only once every ten seconds. */
1365 sleep((i > 10) ? (i = 10) : i);
1367 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1371 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1372 /* This can happen if there's no header for the volume
1373 * we're salvaging, or no headers exist for the VG (if
1374 * we're salvaging an RW). Act as if we got a response
1375 * with no VG members. The headers may be created during
1376 * salvaging, if there are inodes in this VG. */
1378 memset(&q_res, 0, sizeof(q_res));
1379 q_res.rw = singleVolumeNumber;
1383 Log("fileserver refused VGCQuery request for volume %lu on "
1384 "partition %s, code %ld reason %ld\n",
1385 afs_printable_uint32_lu(singleVolumeNumber),
1386 salvinfo->fileSysPartition->name,
1387 afs_printable_int32_ld(code),
1388 afs_printable_int32_ld(res.hdr.reason));
1392 if (q_res.rw != singleVolumeNumber) {
1393 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1394 afs_printable_uint32_lu(singleVolumeNumber),
1395 afs_printable_uint32_lu(q_res.rw));
1396 #ifdef SALVSYNC_BUILD_CLIENT
1397 if (SALVSYNC_LinkVolume(q_res.rw,
1399 salvinfo->fileSysPartition->name,
1401 Log("schedule request failed\n");
1403 #endif /* SALVSYNC_BUILD_CLIENT */
1404 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1407 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1408 osi_Assert(salvinfo->volumeSummaryp != NULL);
1410 salvinfo->nVolumes = 0;
1411 vsp = salvinfo->volumeSummaryp;
1413 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1414 char name[VMAXPATHLEN];
1416 if (!q_res.children[i]) {
1420 /* AskOffline for singleVolumeNumber was called much earlier */
1421 if (q_res.children[i] != singleVolumeNumber) {
1422 AskOffline(salvinfo, q_res.children[i]);
1423 if (LockVolume(salvinfo, q_res.children[i])) {
1429 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1431 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1432 afs_printable_uint32_lu(q_res.children[i]));
1437 DiskToVolumeHeader(&vsp->header, &diskHdr);
1438 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1439 vsp->fileName = ToString(name);
1440 salvinfo->nVolumes++;
1444 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1449 Log("Cannot get volume summary from fileserver; falling back to scanning "
1450 "entire partition\n");
1453 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1458 * count how many volume headers are found by VWalkVolumeHeaders.
1460 * @param[in] dp the disk partition (unused)
1461 * @param[in] name full path to the .vol header (unused)
1462 * @param[in] hdr the header data (unused)
1463 * @param[in] last whether this is the last try or not (unused)
1464 * @param[in] rock actually an afs_int32*; the running count of how many
1465 * volumes we have found
1470 CountHeader(struct DiskPartition64 *dp, const char *name,
1471 struct VolumeDiskHeader *hdr, int last, void *rock)
1473 afs_int32 *nvols = (afs_int32 *)rock;
1479 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1482 struct SalvageScanParams {
1483 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1484 * vol id of the VG we're salvaging */
1485 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1486 * we're filling in */
1487 afs_int32 nVolumes; /**< # of vols we've encountered */
1488 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1489 * # of vols we've alloc'd memory for) */
1490 int retry; /**< do we need to retry vol lock/checkout? */
1491 struct SalvInfo *salvinfo; /**< salvage job info */
1495 * records volume summary info found from VWalkVolumeHeaders.
1497 * Found volumes are also taken offline if they are in the specific volume
1498 * group we are looking for.
1500 * @param[in] dp the disk partition
1501 * @param[in] name full path to the .vol header
1502 * @param[in] hdr the header data
1503 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1504 * @param[in] rock actually a struct SalvageScanParams*, containing the
1505 * information needed to record the volume summary data
1507 * @return operation status
1509 * @retval -1 volume locking raced with fileserver restart; checking out
1510 * and locking volumes needs to be retried
1511 * @retval 1 volume header is mis-named and should be deleted
1514 RecordHeader(struct DiskPartition64 *dp, const char *name,
1515 struct VolumeDiskHeader *hdr, int last, void *rock)
1517 char nameShouldBe[64];
1518 struct SalvageScanParams *params;
1519 struct VolumeSummary summary;
1520 VolumeId singleVolumeNumber;
1521 struct SalvInfo *salvinfo;
1523 params = (struct SalvageScanParams *)rock;
1525 singleVolumeNumber = params->singleVolumeNumber;
1526 salvinfo = params->salvinfo;
1528 DiskToVolumeHeader(&summary.header, hdr);
1530 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1531 && summary.header.parent != singleVolumeNumber) {
1533 if (programType == salvageServer) {
1534 #ifdef SALVSYNC_BUILD_CLIENT
1535 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1536 summary.header.id, summary.header.parent);
1537 if (SALVSYNC_LinkVolume(summary.header.parent,
1541 Log("schedule request failed\n");
1544 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1547 Log("%u is a read-only volume; not salvaged\n",
1548 singleVolumeNumber);
1553 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1554 || summary.header.parent == singleVolumeNumber) {
1556 /* check if the header file is incorrectly named */
1558 const char *base = strrchr(name, OS_DIRSEPC);
1565 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1566 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1569 if (strcmp(nameShouldBe, base)) {
1570 /* .vol file has wrong name; retry/delete */
1574 if (!badname || last) {
1575 /* only offline the volume if the header is good, or if this is
1576 * the last try looking at it; avoid AskOffline'ing the same vol
1579 if (singleVolumeNumber
1580 && summary.header.id != singleVolumeNumber) {
1581 /* don't offline singleVolumeNumber; we already did that
1584 AskOffline(salvinfo, summary.header.id);
1586 #ifdef AFS_DEMAND_ATTACH_FS
1588 /* don't lock the volume if the header is bad, since we're
1589 * about to delete it anyway. */
1590 if (LockVolume(salvinfo, summary.header.id)) {
1595 #endif /* AFS_DEMAND_ATTACH_FS */
1599 if (last && !Showmode) {
1600 Log("Volume header file %s is incorrectly named (should be %s "
1601 "not %s); %sdeleted (it will be recreated later, if "
1602 "necessary)\n", name, nameShouldBe, base,
1603 (Testing ? "it would have been " : ""));
1608 summary.fileName = ToString(base);
1611 if (params->nVolumes > params->totalVolumes) {
1612 /* We found more volumes than we found on the first partition walk;
1613 * apparently something created a volume while we were
1614 * partition-salvaging, or we found more than 20 vols when salvaging a
1615 * particular volume. Abort if we detect this, since other programs
1616 * supposed to not touch the partition while it is partition-salvaging,
1617 * and we shouldn't find more than 20 vols in a VG.
1619 Abort("Found %ld vol headers, but should have found at most %ld! "
1620 "Make sure the volserver/fileserver are not running at the "
1621 "same time as a partition salvage\n",
1622 afs_printable_int32_ld(params->nVolumes),
1623 afs_printable_int32_ld(params->totalVolumes));
1626 memcpy(params->vsp, &summary, sizeof(summary));
1634 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1636 * If the header could not be read in at all, the header is always unlinked.
1637 * If instead RecordHeader said the header was bad (that is, the header file
1638 * is mis-named), we only unlink if we are doing a partition salvage, as
1639 * opposed to salvaging a specific volume group.
1641 * @param[in] dp the disk partition
1642 * @param[in] name full path to the .vol header
1643 * @param[in] hdr header data, or NULL if the header could not be read
1644 * @param[in] rock actually a struct SalvageScanParams*, with some information
1648 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1649 struct VolumeDiskHeader *hdr, void *rock)
1651 struct SalvageScanParams *params;
1654 params = (struct SalvageScanParams *)rock;
1657 /* no header; header is too bogus to read in at all */
1659 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1665 } else if (!params->singleVolumeNumber) {
1666 /* We were able to read in a header, but RecordHeader said something
1667 * was wrong with it. We only unlink those if we are doing a partition
1674 if (dounlink && unlink(name)) {
1675 Log("Error %d while trying to unlink %s\n", errno, name);
1680 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1681 * the fileserver for VG information, or by scanning the /vicepX partition.
1683 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1684 * are salvaging, or 0 if this is a partition
1687 * @return operation status
1689 * @retval -1 we raced with a fileserver restart; checking out and locking
1690 * volumes must be retried
1693 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1695 afs_int32 nvols = 0;
1696 struct SalvageScanParams params;
1699 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1701 /* we successfully got the vol information from the fileserver; no
1702 * need to scan the partition */
1706 /* we need to retry volume checkout */
1710 if (!singleVolumeNumber) {
1711 /* Count how many volumes we have in /vicepX */
1712 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1715 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1720 nvols = VOL_VG_MAX_VOLS;
1723 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1724 osi_Assert(salvinfo->volumeSummaryp != NULL);
1726 params.singleVolumeNumber = singleVolumeNumber;
1727 params.vsp = salvinfo->volumeSummaryp;
1728 params.nVolumes = 0;
1729 params.totalVolumes = nvols;
1731 params.salvinfo = salvinfo;
1733 /* walk the partition directory of volume headers and record the info
1734 * about them; unlinking invalid headers */
1735 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1736 UnlinkHeader, ¶ms);
1738 /* we apparently need to retry checking-out/locking volumes */
1742 Abort("Failed to get volume header summary\n");
1744 salvinfo->nVolumes = params.nVolumes;
1746 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1752 /* Find the link table. This should be associated with the RW volume or, if
1753 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1756 FindLinkHandle(struct InodeSummary *isp, int nVols,
1757 struct ViceInodeInfo *allInodes)
1760 struct ViceInodeInfo *ip;
1762 for (i = 0; i < nVols; i++) {
1763 ip = allInodes + isp[i].index;
1764 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1765 if (ip[j].u.special.type == VI_LINKTABLE)
1766 return ip[j].inodeNumber;
1773 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1775 struct versionStamp version;
1778 if (!VALID_INO(ino))
1780 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1781 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1782 if (!VALID_INO(ino))
1784 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1785 isp->RWvolumeId, errno);
1786 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1787 fdP = IH_OPEN(salvinfo->VGLinkH);
1789 Abort("Can't open link table for volume %u (error = %d)\n",
1790 isp->RWvolumeId, errno);
1792 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1793 Abort("Can't truncate link table for volume %u (error = %d)\n",
1794 isp->RWvolumeId, errno);
1796 version.magic = LINKTABLEMAGIC;
1797 version.version = LINKTABLEVERSION;
1799 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1801 Abort("Can't truncate link table for volume %u (error = %d)\n",
1802 isp->RWvolumeId, errno);
1804 FDH_REALLYCLOSE(fdP);
1806 /* If the volume summary exits (i.e., the V*.vol header file exists),
1807 * then set this inode there as well.
1809 if (isp->volSummary)
1810 isp->volSummary->header.linkTable = ino;
1819 SVGParms_t *parms = (SVGParms_t *) arg;
1820 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1825 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1828 pthread_attr_t tattr;
1832 /* Initialize per volume global variables, even if later code does so */
1833 salvinfo->VolumeChanged = 0;
1834 salvinfo->VGLinkH = NULL;
1835 salvinfo->VGLinkH_cnt = 0;
1836 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1838 parms.svgp_inodeSummaryp = isp;
1839 parms.svgp_count = nVols;
1840 parms.svgp_salvinfo = salvinfo;
1841 code = pthread_attr_init(&tattr);
1843 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1847 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1849 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1852 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1854 Log("Failed to create thread to salvage volume group %u\n",
1858 (void)pthread_join(tid, NULL);
1860 #endif /* AFS_NT40_ENV */
1863 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1865 struct ViceInodeInfo *inodes, *allInodes, *ip;
1866 int i, totalInodes, size, salvageTo;
1870 int dec_VGLinkH = 0;
1872 FdHandle_t *fdP = NULL;
1874 salvinfo->VGLinkH_cnt = 0;
1875 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1876 && isp->nSpecialInodes > 0);
1877 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1878 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1881 if (ShowMounts && !haveRWvolume)
1883 if (canfork && !debug && Fork() != 0) {
1884 (void)Wait("Salvage volume group");
1887 for (i = 0, totalInodes = 0; i < nVols; i++)
1888 totalInodes += isp[i].nInodes;
1889 size = totalInodes * sizeof(struct ViceInodeInfo);
1890 inodes = (struct ViceInodeInfo *)malloc(size);
1891 allInodes = inodes - isp->index; /* this would the base of all the inodes
1892 * for the partition, if all the inodes
1893 * had been read into memory */
1895 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1897 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1899 /* Don't try to salvage a read write volume if there isn't one on this
1901 salvageTo = haveRWvolume ? 0 : 1;
1903 #ifdef AFS_NAMEI_ENV
1904 ino = FindLinkHandle(isp, nVols, allInodes);
1905 if (VALID_INO(ino)) {
1906 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1907 fdP = IH_OPEN(salvinfo->VGLinkH);
1909 if (!VALID_INO(ino) || fdP == NULL) {
1910 Log("%s link table for volume %u.\n",
1911 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1913 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1916 struct ViceInodeInfo *ip;
1917 CreateLinkTable(salvinfo, isp, ino);
1918 fdP = IH_OPEN(salvinfo->VGLinkH);
1919 /* Sync fake 1 link counts to the link table, now that it exists */
1921 for (i = 0; i < nVols; i++) {
1922 ip = allInodes + isp[i].index;
1923 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1924 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1931 FDH_REALLYCLOSE(fdP);
1933 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1936 /* Salvage in reverse order--read/write volume last; this way any
1937 * Inodes not referenced by the time we salvage the read/write volume
1938 * can be picked up by the read/write volume */
1939 /* ACTUALLY, that's not done right now--the inodes just vanish */
1940 for (i = nVols - 1; i >= salvageTo; i--) {
1942 struct InodeSummary *lisp = &isp[i];
1943 #ifdef AFS_NAMEI_ENV
1944 /* If only the RO is present on this partition, the link table
1945 * shows up as a RW volume special file. Need to make sure the
1946 * salvager doesn't try to salvage the non-existent RW.
1948 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1949 /* If this only special inode is the link table, continue */
1950 if (inodes->u.special.type == VI_LINKTABLE) {
1957 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1958 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1959 /* Check inodes twice. The second time do things seriously. This
1960 * way the whole RO volume can be deleted, below, if anything goes wrong */
1961 for (check = 1; check >= 0; check--) {
1963 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1965 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1966 if (rw && deleteMe) {
1967 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1968 * volume won't be called */
1974 if (rw && check == 1)
1976 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1977 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1983 /* Fix actual inode counts */
1986 Log("totalInodes %d\n",totalInodes);
1987 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1988 static int TraceBadLinkCounts = 0;
1989 #ifdef AFS_NAMEI_ENV
1990 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1991 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1992 VGLinkH_p1 = ip->u.param[0];
1993 continue; /* Deal with this last. */
1996 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1997 TraceBadLinkCounts--; /* Limit reports, per volume */
1998 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2000 while (ip->linkCount > 0) {
2001 /* below used to assert, not break */
2003 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2004 Log("idec failed. inode %s errno %d\n",
2005 PrintInode(stmp, ip->inodeNumber), errno);
2011 while (ip->linkCount < 0) {
2012 /* these used to be asserts */
2014 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2015 Log("iinc failed. inode %s errno %d\n",
2016 PrintInode(stmp, ip->inodeNumber), errno);
2023 #ifdef AFS_NAMEI_ENV
2024 while (dec_VGLinkH > 0) {
2025 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2026 Log("idec failed on link table, errno = %d\n", errno);
2030 while (dec_VGLinkH < 0) {
2031 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2032 Log("iinc failed on link table, errno = %d\n", errno);
2039 /* Directory consistency checks on the rw volume */
2041 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2042 IH_RELEASE(salvinfo->VGLinkH);
2044 if (canfork && !debug) {
2051 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2053 /* Check headers BEFORE forking */
2057 for (i = 0; i < nVols; i++) {
2058 struct VolumeSummary *vs = isp[i].volSummary;
2059 VolumeDiskData volHeader;
2061 /* Don't salvage just because phantom rw volume is there... */
2062 /* (If a read-only volume exists, read/write inodes must also exist) */
2063 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2067 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2068 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2069 == sizeof(volHeader)
2070 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2071 && volHeader.dontSalvage == DONT_SALVAGE
2072 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2073 if (volHeader.inUse != 0) {
2074 volHeader.inUse = 0;
2075 volHeader.inService = 1;
2077 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2078 != sizeof(volHeader)) {
2094 /* SalvageVolumeHeaderFile
2096 * Salvage the top level V*.vol header file. Make sure the special files
2097 * exist and that there are no duplicates.
2099 * Calls SalvageHeader for each possible type of volume special file.
2103 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2104 struct ViceInodeInfo *inodes, int RW,
2105 int check, int *deleteMe)
2108 struct ViceInodeInfo *ip;
2109 int allinodesobsolete = 1;
2110 struct VolumeDiskHeader diskHeader;
2111 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2113 struct VolumeHeader tempHeader;
2114 struct afs_inode_info stuff[MAXINODETYPE];
2116 /* keeps track of special inodes that are probably 'good'; they are
2117 * referenced in the vol header, and are included in the given inodes
2122 } goodspecial[MAXINODETYPE];
2127 memset(goodspecial, 0, sizeof(goodspecial));
2129 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2131 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2133 Log("cannot allocate memory for inode skip array when salvaging "
2134 "volume %lu; not performing duplicate special inode recovery\n",
2135 afs_printable_uint32_lu(isp->volumeId));
2136 /* still try to perform the salvage; the skip array only does anything
2137 * if we detect duplicate special inodes */
2140 init_inode_info(&tempHeader, stuff);
2143 * First, look at the special inodes and see if any are referenced by
2144 * the existing volume header. If we find duplicate special inodes, we
2145 * can use this information to use the referenced inode (it's more
2146 * likely to be the 'good' one), and throw away the duplicates.
2148 if (isp->volSummary && skip) {
2149 /* use tempHeader, so we can use the stuff[] array to easily index
2150 * into the isp->volSummary special inodes */
2151 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2153 for (i = 0; i < isp->nSpecialInodes; i++) {
2154 ip = &inodes[isp->index + i];
2155 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2156 /* will get taken care of in a later loop */
2159 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2160 goodspecial[ip->u.special.type-1].valid = 1;
2161 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2166 memset(&tempHeader, 0, sizeof(tempHeader));
2167 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2168 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2169 tempHeader.id = isp->volumeId;
2170 tempHeader.parent = isp->RWvolumeId;
2172 /* Check for duplicates (inodes are sorted by type field) */
2173 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2174 ip = &inodes[isp->index + i];
2175 if (ip->u.special.type == (ip + 1)->u.special.type) {
2176 afs_ino_str_t stmp1, stmp2;
2178 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2179 /* Will be caught in the loop below */
2183 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2184 ip->u.special.type, isp->volumeId,
2185 PrintInode(stmp1, ip->inodeNumber),
2186 PrintInode(stmp2, (ip+1)->inodeNumber));
2188 if (skip && goodspecial[ip->u.special.type-1].valid) {
2189 Inode gi = goodspecial[ip->u.special.type-1].inode;
2192 Log("using special inode referenced by vol header (%s)\n",
2193 PrintInode(stmp1, gi));
2196 /* the volume header references some special inode of
2197 * this type in the inodes array; are we it? */
2198 if (ip->inodeNumber != gi) {
2200 } else if ((ip+1)->inodeNumber != gi) {
2201 /* in case this is the last iteration; we need to
2202 * make sure we check ip+1, too */
2207 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2215 for (i = 0; i < isp->nSpecialInodes; i++) {
2217 ip = &inodes[isp->index + i];
2218 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2220 Log("Rubbish header inode %s of type %d\n",
2221 PrintInode(stmp, ip->inodeNumber),
2222 ip->u.special.type);
2228 Log("Rubbish header inode %s of type %d; deleted\n",
2229 PrintInode(stmp, ip->inodeNumber),
2230 ip->u.special.type);
2231 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2232 if (skip && skip[i]) {
2233 if (orphans == ORPH_REMOVE) {
2234 Log("Removing orphan special inode %s of type %d\n",
2235 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2238 Log("Ignoring orphan special inode %s of type %d\n",
2239 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2240 /* fall through to the ip->linkCount--; line below */
2243 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2244 allinodesobsolete = 0;
2246 if (!check && ip->u.special.type != VI_LINKTABLE)
2247 ip->linkCount--; /* Keep the inode around */
2255 if (allinodesobsolete) {
2262 salvinfo->VGLinkH_cnt++; /* one for every header. */
2264 if (!RW && !check && isp->volSummary) {
2265 ClearROInUseBit(isp->volSummary);
2269 for (i = 0; i < MAXINODETYPE; i++) {
2270 if (stuff[i].inodeType == VI_LINKTABLE) {
2271 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2272 * And we may have recreated the link table earlier, so set the
2273 * RW header as well.
2275 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2276 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2280 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2284 if (isp->volSummary == NULL) {
2286 char headerName[64];
2287 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2288 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2290 Log("No header file for volume %u\n", isp->volumeId);
2294 Log("No header file for volume %u; %screating %s\n",
2295 isp->volumeId, (Testing ? "it would have been " : ""),
2297 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2298 isp->volSummary->fileName = ToString(headerName);
2300 writefunc = VCreateVolumeDiskHeader;
2303 char headerName[64];
2304 /* hack: these two fields are obsolete... */
2305 isp->volSummary->header.volumeAcl = 0;
2306 isp->volSummary->header.volumeMountTable = 0;
2309 (&isp->volSummary->header, &tempHeader,
2310 sizeof(struct VolumeHeader))) {
2311 /* We often remove the name before calling us, so we make a fake one up */
2312 if (isp->volSummary->fileName) {
2313 strcpy(headerName, isp->volSummary->fileName);
2315 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2316 isp->volSummary->fileName = ToString(headerName);
2318 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2320 Log("Header file %s is damaged or no longer valid%s\n", path,
2321 (check ? "" : "; repairing"));
2325 writefunc = VWriteVolumeDiskHeader;
2329 memcpy(&isp->volSummary->header, &tempHeader,
2330 sizeof(struct VolumeHeader));
2333 Log("It would have written a new header file for volume %u\n",
2337 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2338 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2340 Log("Error %ld writing volume header file for volume %lu\n",
2341 afs_printable_int32_ld(code),
2342 afs_printable_uint32_lu(diskHeader.id));
2347 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2348 isp->volSummary->header.volumeInfo);
2353 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2354 struct InodeSummary *isp, int check, int *deleteMe)
2357 VolumeDiskData volumeInfo;
2358 struct versionStamp fileHeader;
2367 #ifndef AFS_NAMEI_ENV
2368 if (sp->inodeType == VI_LINKTABLE)
2371 if (*(sp->inode) == 0) {
2373 Log("Missing inode in volume header (%s)\n", sp->description);
2377 Log("Missing inode in volume header (%s); %s\n", sp->description,
2378 (Testing ? "it would have recreated it" : "recreating"));
2381 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2382 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2383 if (!VALID_INO(*(sp->inode)))
2385 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2386 sp->description, errno);
2391 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2392 fdP = IH_OPEN(specH);
2393 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2394 /* bail out early and destroy the volume */
2396 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2403 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2404 sp->description, errno);
2407 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2408 || header.fileHeader.magic != sp->stamp.magic)) {
2410 Log("Part of the header (%s) is corrupted\n", sp->description);
2411 FDH_REALLYCLOSE(fdP);
2415 Log("Part of the header (%s) is corrupted; recreating\n",
2418 /* header can be garbage; make sure we don't read garbage data from
2420 memset(&header, 0, sizeof(header));
2422 if (sp->inodeType == VI_VOLINFO
2423 && header.volumeInfo.destroyMe == DESTROY_ME) {
2426 FDH_REALLYCLOSE(fdP);
2430 if (recreate && !Testing) {
2433 ("Internal error: recreating volume header (%s) in check mode\n",
2435 nBytes = FDH_TRUNC(fdP, 0);
2437 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2438 sp->description, errno);
2440 /* The following code should be moved into vutil.c */
2441 if (sp->inodeType == VI_VOLINFO) {
2443 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2444 header.volumeInfo.stamp = sp->stamp;
2445 header.volumeInfo.id = isp->volumeId;
2446 header.volumeInfo.parentId = isp->RWvolumeId;
2447 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2448 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2449 isp->volumeId, isp->volumeId);
2450 header.volumeInfo.inService = 0;
2451 header.volumeInfo.blessed = 0;
2452 /* The + 1000 is a hack in case there are any files out in venus caches */
2453 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2454 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2455 header.volumeInfo.needsCallback = 0;
2456 gettimeofday(&tp, 0);
2457 header.volumeInfo.creationDate = tp.tv_sec;
2459 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2460 sizeof(header.volumeInfo), 0);
2461 if (nBytes != sizeof(header.volumeInfo)) {
2464 ("Unable to write volume header file (%s) (errno = %d)\n",
2465 sp->description, errno);
2466 Abort("Unable to write entire volume header file (%s)\n",
2470 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2471 if (nBytes != sizeof(sp->stamp)) {
2474 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2475 sp->description, errno);
2477 ("Unable to write entire version stamp in volume header file (%s)\n",
2482 FDH_REALLYCLOSE(fdP);
2484 if (sp->inodeType == VI_VOLINFO) {
2485 salvinfo->VolInfo = header.volumeInfo;
2489 if (salvinfo->VolInfo.updateDate) {
2490 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2492 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2493 salvinfo->VolInfo.id,
2494 (Testing ? "it would have been " : ""), update);
2496 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2498 Log("%s (%u) not updated (created %s)\n",
2499 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2509 SalvageVnodes(struct SalvInfo *salvinfo,
2510 struct InodeSummary *rwIsp,
2511 struct InodeSummary *thisIsp,
2512 struct ViceInodeInfo *inodes, int check)
2514 int ilarge, ismall, ioffset, RW, nInodes;
2515 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2518 RW = (rwIsp == thisIsp);
2519 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2521 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2522 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2523 if (check && ismall == -1)
2526 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2527 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2528 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2532 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2533 struct ViceInodeInfo *ip, int nInodes,
2534 struct VolumeSummary *volSummary, int check)
2536 char buf[SIZEOF_LARGEDISKVNODE];
2537 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2539 StreamHandle_t *file;
2540 struct VnodeClassInfo *vcp;
2542 afs_sfsize_t nVnodes;
2543 afs_fsize_t vnodeLength;
2545 afs_ino_str_t stmp1, stmp2;
2549 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2550 fdP = IH_OPEN(handle);
2551 osi_Assert(fdP != NULL);
2552 file = FDH_FDOPEN(fdP, "r+");
2553 osi_Assert(file != NULL);
2554 vcp = &VnodeClassInfo[class];
2555 size = OS_SIZE(fdP->fd_fd);
2556 osi_Assert(size != -1);
2557 nVnodes = (size / vcp->diskSize) - 1;
2559 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2560 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2564 for (vnodeIndex = 0;
2565 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2566 nVnodes--, vnodeIndex++) {
2567 if (vnode->type != vNull) {
2568 int vnodeChanged = 0;
2569 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2570 if (VNDISK_GET_INO(vnode) == 0) {
2572 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2573 memset(vnode, 0, vcp->diskSize);
2577 if (vcp->magic != vnode->vnodeMagic) {
2578 /* bad magic #, probably partially created vnode */
2580 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2581 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2582 afs_printable_uint32_lu(vcp->magic));
2583 memset(vnode, 0, vcp->diskSize);
2587 Log("Partially allocated vnode %d deleted.\n",
2589 memset(vnode, 0, vcp->diskSize);
2593 /* ****** Should do a bit more salvage here: e.g. make sure
2594 * vnode type matches what it should be given the index */
2595 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2596 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2597 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2598 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2605 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2606 /* The following doesn't work, because the version number
2607 * is not maintained correctly by the file server */
2608 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2609 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2611 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2617 /* For RW volume, look for vnode with matching inode number;
2618 * if no such match, take the first determined by our sort
2620 struct ViceInodeInfo *lip = ip;
2621 int lnInodes = nInodes;
2623 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2624 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2633 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2634 /* "Matching" inode */
2638 vu = vnode->uniquifier;
2639 iu = ip->u.vnode.vnodeUniquifier;
2640 vd = vnode->dataVersion;
2641 id = ip->u.vnode.inodeDataVersion;
2643 * Because of the possibility of the uniquifier overflows (> 4M)
2644 * we compare them modulo the low 22-bits; we shouldn't worry
2645 * about mismatching since they shouldn't to many old
2646 * uniquifiers of the same vnode...
2648 if (IUnique(vu) != IUnique(iu)) {
2650 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2653 vnode->uniquifier = iu;
2654 #ifdef AFS_3DISPARES
2655 vnode->dataVersion = (id >= vd ?
2658 1887437 ? vd : id) :
2661 1887437 ? id : vd));
2663 #if defined(AFS_SGI_EXMAG)
2664 vnode->dataVersion = (id >= vd ?
2667 15099494 ? vd : id) :
2670 15099494 ? id : vd));
2672 vnode->dataVersion = (id > vd ? id : vd);
2673 #endif /* AFS_SGI_EXMAG */
2674 #endif /* AFS_3DISPARES */
2677 /* don't bother checking for vd > id any more, since
2678 * partial file transfers always result in this state,
2679 * and you can't do much else anyway (you've already
2680 * found the best data you can) */
2681 #ifdef AFS_3DISPARES
2682 if (!vnodeIsDirectory(vnodeNumber)
2683 && ((vd < id && (id - vd) < 1887437)
2684 || ((vd > id && (vd - id) > 1887437)))) {
2686 #if defined(AFS_SGI_EXMAG)
2687 if (!vnodeIsDirectory(vnodeNumber)
2688 && ((vd < id && (id - vd) < 15099494)
2689 || ((vd > id && (vd - id) > 15099494)))) {
2691 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2692 #endif /* AFS_SGI_EXMAG */
2695 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2696 vnode->dataVersion = id;
2701 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2704 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2706 VNDISK_SET_INO(vnode, ip->inodeNumber);
2711 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2713 VNDISK_SET_INO(vnode, ip->inodeNumber);
2716 VNDISK_GET_LEN(vnodeLength, vnode);
2717 if (ip->byteCount != vnodeLength) {
2720 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2725 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2726 VNDISK_SET_LEN(vnode, ip->byteCount);
2730 ip->linkCount--; /* Keep the inode around */
2733 } else { /* no matching inode */
2735 if (VNDISK_GET_INO(vnode) != 0
2736 || vnode->type == vDirectory) {
2737 /* No matching inode--get rid of the vnode */
2739 if (VNDISK_GET_INO(vnode)) {
2741 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2745 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2750 if (VNDISK_GET_INO(vnode)) {
2752 time_t serverModifyTime = vnode->serverModifyTime;
2753 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2757 time_t serverModifyTime = vnode->serverModifyTime;
2758 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2761 memset(vnode, 0, vcp->diskSize);
2764 /* Should not reach here becuase we checked for
2765 * (inodeNumber == 0) above. And where we zero the vnode,
2766 * we also goto vnodeDone.
2770 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2774 } /* VNDISK_GET_INO(vnode) != 0 */
2776 osi_Assert(!(vnodeChanged && check));
2777 if (vnodeChanged && !Testing) {
2778 osi_Assert(IH_IWRITE
2779 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2780 (char *)vnode, vcp->diskSize)
2782 salvinfo->VolumeChanged = 1; /* For break call back */
2793 struct VnodeEssence *
2794 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2797 struct VnodeInfo *vip;
2800 class = vnodeIdToClass(vnodeNumber);
2801 vip = &salvinfo->vnodeInfo[class];
2802 offset = vnodeIdToBitNumber(vnodeNumber);
2803 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2807 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2809 /* Copy the directory unconditionally if we are going to change it:
2810 * not just if was cloned.
2812 struct VnodeDiskObject vnode;
2813 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2814 Inode oldinode, newinode;
2817 if (dir->copied || Testing)
2819 DFlush(); /* Well justified paranoia... */
2822 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2823 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2825 osi_Assert(code == sizeof(vnode));
2826 oldinode = VNDISK_GET_INO(&vnode);
2827 /* Increment the version number by a whole lot to avoid problems with
2828 * clients that were promised new version numbers--but the file server
2829 * crashed before the versions were written to disk.
2832 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2833 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2835 osi_Assert(VALID_INO(newinode));
2836 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2838 VNDISK_SET_INO(&vnode, newinode);
2840 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2841 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2843 osi_Assert(code == sizeof(vnode));
2845 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2846 salvinfo->fileSysDevice, newinode,
2847 &salvinfo->VolumeChanged);
2848 /* Don't delete the original inode right away, because the directory is
2849 * still being scanned.
2855 * This function should either successfully create a new dir, or give up
2856 * and leave things the way they were. In particular, if it fails to write
2857 * the new dir properly, it should return w/o changing the reference to the
2861 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2863 struct VnodeDiskObject vnode;
2864 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2865 Inode oldinode, newinode;
2870 afs_int32 parentUnique = 1;
2871 struct VnodeEssence *vnodeEssence;
2876 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2878 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2879 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2881 osi_Assert(lcode == sizeof(vnode));
2882 oldinode = VNDISK_GET_INO(&vnode);
2883 /* Increment the version number by a whole lot to avoid problems with
2884 * clients that were promised new version numbers--but the file server
2885 * crashed before the versions were written to disk.
2888 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2889 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2891 osi_Assert(VALID_INO(newinode));
2892 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2893 &salvinfo->VolumeChanged);
2895 /* Assign . and .. vnode numbers from dir and vnode.parent.
2896 * The uniquifier for . is in the vnode.
2897 * The uniquifier for .. might be set to a bogus value of 1 and
2898 * the salvager will later clean it up.
2900 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2901 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2904 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2906 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2911 /* didn't really build the new directory properly, let's just give up. */
2912 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2913 Log("Directory salvage returned code %d, continuing.\n", code);
2915 Log("also failed to decrement link count on new inode");
2919 Log("Checking the results of the directory salvage...\n");
2920 if (!DirOK(&newdir)) {
2921 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2922 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2923 osi_Assert(code == 0);
2927 VNDISK_SET_INO(&vnode, newinode);
2928 length = Length(&newdir);
2929 VNDISK_SET_LEN(&vnode, length);
2931 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2932 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2934 osi_Assert(lcode == sizeof(vnode));
2937 nt_sync(salvinfo->fileSysDevice);
2939 sync(); /* this is slow, but hopefully rarely called. We don't have
2940 * an open FD on the file itself to fsync.
2944 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2946 /* make sure old directory file is really closed */
2947 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2948 FDH_REALLYCLOSE(fdP);
2950 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2951 osi_Assert(code == 0);
2952 dir->dirHandle = newdir;
2956 * arguments for JudgeEntry.
2958 struct judgeEntry_params {
2959 struct DirSummary *dir; /**< directory we're examining entries in */
2960 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2964 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2967 struct judgeEntry_params *params = arock;
2968 struct DirSummary *dir = params->dir;
2969 struct SalvInfo *salvinfo = params->salvinfo;
2970 struct VnodeEssence *vnodeEssence;
2971 afs_int32 dirOrphaned, todelete;
2973 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2975 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2976 if (vnodeEssence == NULL) {
2978 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2981 CopyOnWrite(salvinfo, dir);
2982 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2987 #ifndef AFS_NAMEI_ENV
2988 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2989 * mount inode for the partition. If this inode were deleted, it would crash
2992 if (vnodeEssence->InodeNumber == 0) {
2993 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2995 CopyOnWrite(salvinfo, dir);
2996 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3003 if (!(vnodeNumber & 1) && !Showmode
3004 && !(vnodeEssence->count || vnodeEssence->unique
3005 || vnodeEssence->modeBits)) {
3006 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3007 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3008 vnodeNumber, unique,
3009 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3013 CopyOnWrite(salvinfo, dir);
3014 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3020 /* Check if the Uniquifiers match. If not, change the directory entry
3021 * so its unique matches the vnode unique. Delete if the unique is zero
3022 * or if the directory is orphaned.
3024 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3025 if (!vnodeEssence->unique
3026 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3027 /* This is an orphaned directory. Don't delete the . or ..
3028 * entry. Otherwise, it will get created in the next
3029 * salvage and deleted again here. So Just skip it.
3034 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3037 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3041 fid.Vnode = vnodeNumber;
3042 fid.Unique = vnodeEssence->unique;
3043 CopyOnWrite(salvinfo, dir);
3044 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3046 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3049 return 0; /* no need to continue */
3052 if (strcmp(name, ".") == 0) {
3053 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3056 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3058 CopyOnWrite(salvinfo, dir);
3059 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3060 fid.Vnode = dir->vnodeNumber;
3061 fid.Unique = dir->unique;
3062 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3065 vnodeNumber = fid.Vnode; /* Get the new Essence */
3066 unique = fid.Unique;
3067 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3070 } else if (strcmp(name, "..") == 0) {
3073 struct VnodeEssence *dotdot;
3074 pa.Vnode = dir->parent;
3075 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3076 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3077 pa.Unique = dotdot->unique;
3079 pa.Vnode = dir->vnodeNumber;
3080 pa.Unique = dir->unique;
3082 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3084 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3086 CopyOnWrite(salvinfo, dir);
3087 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3088 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3091 vnodeNumber = pa.Vnode; /* Get the new Essence */
3093 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3095 dir->haveDotDot = 1;
3096 } else if (strncmp(name, ".__afs", 6) == 0) {
3098 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3101 CopyOnWrite(salvinfo, dir);
3102 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3104 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3105 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3108 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3109 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3110 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3111 && !(vnodeEssence->modeBits & 0111)) {
3112 afs_sfsize_t nBytes;
3118 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3119 vnodeEssence->InodeNumber);
3122 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3126 size = FDH_SIZE(fdP);
3128 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3129 FDH_REALLYCLOSE(fdP);
3136 nBytes = FDH_PREAD(fdP, buf, size, 0);
3137 if (nBytes == size) {
3139 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3140 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3141 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3142 Testing ? "would convert" : "converted");
3143 vnodeEssence->modeBits |= 0111;
3144 vnodeEssence->changed = 1;
3145 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3146 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3147 dir->name ? dir->name : "??", name, buf);
3149 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3150 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3152 FDH_REALLYCLOSE(fdP);
3155 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3156 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3157 if (vnodeIdToClass(vnodeNumber) == vLarge
3158 && vnodeEssence->name == NULL) {
3160 if ((n = (char *)malloc(strlen(name) + 1)))
3162 vnodeEssence->name = n;
3165 /* The directory entry points to the vnode. Check to see if the
3166 * vnode points back to the directory. If not, then let the
3167 * directory claim it (else it might end up orphaned). Vnodes
3168 * already claimed by another directory are deleted from this
3169 * directory: hardlinks to the same vnode are not allowed
3170 * from different directories.
3172 if (vnodeEssence->parent != dir->vnodeNumber) {
3173 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3174 /* Vnode does not point back to this directory.
3175 * Orphaned dirs cannot claim a file (it may belong to
3176 * another non-orphaned dir).
3179 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3181 vnodeEssence->parent = dir->vnodeNumber;
3182 vnodeEssence->changed = 1;
3184 /* Vnode was claimed by another directory */
3187 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3188 } else if (vnodeNumber == 1) {
3189 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3191 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3195 CopyOnWrite(salvinfo, dir);
3196 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3201 /* This directory claims the vnode */
3202 vnodeEssence->claimed = 1;
3204 vnodeEssence->count--;
3209 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3210 VnodeClass class, Inode ino, Unique * maxu)
3212 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3213 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3214 char buf[SIZEOF_LARGEDISKVNODE];
3215 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3217 StreamHandle_t *file;
3222 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3223 fdP = IH_OPEN(vip->handle);
3224 osi_Assert(fdP != NULL);
3225 file = FDH_FDOPEN(fdP, "r+");
3226 osi_Assert(file != NULL);
3227 size = OS_SIZE(fdP->fd_fd);
3228 osi_Assert(size != -1);
3229 vip->nVnodes = (size / vcp->diskSize) - 1;
3230 if (vip->nVnodes > 0) {
3231 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3232 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3233 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3234 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3235 if (class == vLarge) {
3236 osi_Assert((vip->inodes = (Inode *)
3237 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3246 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3247 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3248 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3249 nVnodes--, vnodeIndex++) {
3250 if (vnode->type != vNull) {
3251 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3252 afs_fsize_t vnodeLength;
3253 vip->nAllocatedVnodes++;
3254 vep->count = vnode->linkCount;
3255 VNDISK_GET_LEN(vnodeLength, vnode);
3256 vep->blockCount = nBlocks(vnodeLength);
3257 vip->volumeBlockCount += vep->blockCount;
3258 vep->parent = vnode->parent;
3259 vep->unique = vnode->uniquifier;
3260 if (*maxu < vnode->uniquifier)
3261 *maxu = vnode->uniquifier;
3262 vep->modeBits = vnode->modeBits;
3263 vep->InodeNumber = VNDISK_GET_INO(vnode);
3264 vep->type = vnode->type;
3265 vep->author = vnode->author;
3266 vep->owner = vnode->owner;
3267 vep->group = vnode->group;
3268 if (vnode->type == vDirectory) {
3269 if (class != vLarge) {
3270 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3271 vip->nAllocatedVnodes--;
3272 memset(vnode, 0, sizeof(vnode));
3273 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3274 vnodeIndexOffset(vcp, vnodeNumber),
3275 (char *)&vnode, sizeof(vnode));
3276 salvinfo->VolumeChanged = 1;
3278 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3287 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3290 struct VnodeEssence *parentvp;
3296 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3297 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3298 strcat(path, OS_DIRSEP);
3299 strcat(path, vp->name);
3305 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3306 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3309 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3311 struct VnodeEssence *vep;
3314 return (1); /* Vnode zero does not exist */
3316 return (0); /* The root dir vnode is always claimed */
3317 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3318 if (!vep || !vep->claimed)
3319 return (1); /* Vnode is not claimed - it is orphaned */
3321 return (IsVnodeOrphaned(salvinfo, vep->parent));
3325 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3326 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3327 struct DirSummary *rootdir, int *rootdirfound)
3329 static struct DirSummary dir;
3330 static struct DirHandle dirHandle;
3331 struct VnodeEssence *parent;
3332 static char path[MAXPATHLEN];
3335 if (dirVnodeInfo->vnodes[i].salvaged)
3336 return; /* already salvaged */
3339 dirVnodeInfo->vnodes[i].salvaged = 1;
3341 if (dirVnodeInfo->inodes[i] == 0)
3342 return; /* Not allocated to a directory */
3344 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3345 if (dirVnodeInfo->vnodes[i].parent) {
3346 Log("Bad parent, vnode 1; %s...\n",
3347 (Testing ? "skipping" : "salvaging"));
3348 dirVnodeInfo->vnodes[i].parent = 0;
3349 dirVnodeInfo->vnodes[i].changed = 1;
3352 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3353 if (parent && parent->salvaged == 0)
3354 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3355 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3356 rootdir, rootdirfound);
3359 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3360 dir.unique = dirVnodeInfo->vnodes[i].unique;
3363 dir.parent = dirVnodeInfo->vnodes[i].parent;
3364 dir.haveDot = dir.haveDotDot = 0;
3365 dir.ds_linkH = alinkH;
3366 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3367 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3369 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3372 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3373 (Testing ? "skipping" : "salvaging"));
3376 CopyAndSalvage(salvinfo, &dir);
3378 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3381 dirHandle = dir.dirHandle;
3384 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3385 &dirVnodeInfo->vnodes[i], path);
3388 /* If enumeration failed for random reasons, we will probably delete
3389 * too much stuff, so we guard against this instead.
3391 struct judgeEntry_params judge_params;
3392 judge_params.salvinfo = salvinfo;
3393 judge_params.dir = &dir;
3395 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3398 /* Delete the old directory if it was copied in order to salvage.
3399 * CopyOnWrite has written the new inode # to the disk, but we still
3400 * have the old one in our local structure here. Thus, we idec the
3404 if (dir.copied && !Testing) {
3405 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3406 osi_Assert(code == 0);
3407 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3410 /* Remember rootdir DirSummary _after_ it has been judged */
3411 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3412 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3420 * Get a new FID that can be used to create a new file.
3422 * @param[in] volHeader vol header for the volume
3423 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3424 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3425 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3426 * updated to the new max unique if we create a new
3430 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3431 VnodeClass class, AFSFid *afid, Unique *maxunique)
3434 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3435 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3439 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3440 /* no free vnodes; make a new one */
3441 salvinfo->vnodeInfo[class].nVnodes++;
3442 salvinfo->vnodeInfo[class].vnodes =
3443 realloc(salvinfo->vnodeInfo[class].vnodes,
3444 sizeof(struct VnodeEssence) * (i+1));
3446 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3449 afid->Vnode = bitNumberToVnodeNumber(i, class);
3451 if (volHeader->uniquifier < (*maxunique + 1)) {
3452 /* header uniq is bad; it will get bumped by 2000 later */
3453 afid->Unique = *maxunique + 1 + 2000;
3456 /* header uniq seems okay; just use that */
3457 afid->Unique = *maxunique = volHeader->uniquifier++;
3462 * Create a vnode for a README file explaining not to use a recreated-root vol.
3464 * @param[in] volHeader vol header for the volume
3465 * @param[in] alinkH ihandle for i/o for the volume
3466 * @param[in] vid volume id
3467 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3468 * updated to the new max unique if we create a new
3470 * @param[out] afid FID for the new readme vnode
3471 * @param[out] ainode the inode for the new readme file
3473 * @return operation status
3478 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3479 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3483 struct VnodeDiskObject *rvnode = NULL;
3485 IHandle_t *readmeH = NULL;
3486 struct VnodeEssence *vep;
3488 time_t now = time(NULL);
3490 /* Try to make the note brief, but informative. Only administrators should
3491 * be able to read this file at first, so we can hopefully assume they
3492 * know what AFS is, what a volume is, etc. */
3494 "This volume has been salvaged, but has lost its original root directory.\n"
3495 "The root directory that exists now has been recreated from orphan files\n"
3496 "from the rest of the volume. This recreated root directory may interfere\n"
3497 "with old cached data on clients, and there is no way the salvager can\n"
3498 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3499 "use this volume, but only copy the salvaged data to a new volume.\n"
3500 "Continuing to use this volume as it exists now may cause some clients to\n"
3501 "behave oddly when accessing this volume.\n"
3502 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3503 /* ^ the person reading this probably just lost some data, so they could
3504 * use some cheering up. */
3506 /* -1 for the trailing NUL */
3507 length = sizeof(readme) - 1;
3509 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3511 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3513 /* create the inode and write the contents */
3514 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3515 salvinfo->fileSysPath, 0, vid,
3516 afid->Vnode, afid->Unique, 1);
3517 if (!VALID_INO(readmeinode)) {
3518 Log("CreateReadme: readme IH_CREATE failed\n");
3522 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3523 bytes = IH_IWRITE(readmeH, 0, readme, length);
3524 IH_RELEASE(readmeH);
3526 if (bytes != length) {
3527 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3528 (int)sizeof(readme));
3532 /* create the vnode and write it out */
3533 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3535 Log("CreateRootDir: error alloc'ing memory\n");
3539 rvnode->type = vFile;
3541 rvnode->modeBits = 0777;
3542 rvnode->linkCount = 1;
3543 VNDISK_SET_LEN(rvnode, length);
3544 rvnode->uniquifier = afid->Unique;
3545 rvnode->dataVersion = 1;
3546 VNDISK_SET_INO(rvnode, readmeinode);
3547 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3552 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3554 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3555 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3556 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3558 if (bytes != SIZEOF_SMALLDISKVNODE) {
3559 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3560 (int)SIZEOF_SMALLDISKVNODE);
3564 /* update VnodeEssence for new readme vnode */
3565 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3567 vep->blockCount = nBlocks(length);
3568 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3569 vep->parent = rvnode->parent;
3570 vep->unique = rvnode->uniquifier;
3571 vep->modeBits = rvnode->modeBits;
3572 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3573 vep->type = rvnode->type;
3574 vep->author = rvnode->author;
3575 vep->owner = rvnode->owner;
3576 vep->group = rvnode->group;
3586 *ainode = readmeinode;
3591 if (IH_DEC(alinkH, readmeinode, vid)) {
3592 Log("CreateReadme (recovery): IH_DEC failed\n");
3604 * create a root dir for a volume that lacks one.
3606 * @param[in] volHeader vol header for the volume
3607 * @param[in] alinkH ihandle for disk access for this volume group
3608 * @param[in] vid volume id we're dealing with
3609 * @param[out] rootdir populated with info about the new root dir
3610 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3611 * updated to the new max unique if we create a new
3614 * @return operation status
3619 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3620 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3624 int decroot = 0, decreadme = 0;
3625 AFSFid did, readmeid;
3628 struct VnodeDiskObject *rootvnode = NULL;
3629 struct acl_accessList *ACL;
3632 struct VnodeEssence *vep;
3634 time_t now = time(NULL);
3636 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3637 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3641 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3642 /* We don't have any large vnodes in the volume; allocate room
3643 * for one so we can recreate the root dir */
3644 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3645 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3646 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3648 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3649 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3652 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3653 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3654 if (vep->type != vNull) {
3655 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3659 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3660 &readmeinode) != 0) {
3665 /* set the DV to a very high number, so it is unlikely that we collide
3666 * with a cached DV */
3669 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3671 if (!VALID_INO(rootinode)) {
3672 Log("CreateRootDir: IH_CREATE failed\n");
3677 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3678 rootinode, &salvinfo->VolumeChanged);
3682 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3683 Log("CreateRootDir: MakeDir failed\n");
3686 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3687 Log("CreateRootDir: Create failed\n");
3691 length = Length(&rootdir->dirHandle);
3692 DZap((void *)&rootdir->dirHandle);
3694 /* create the new root dir vnode */
3695 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3697 Log("CreateRootDir: malloc failed\n");
3701 /* only give 'rl' permissions to 'system:administrators'. We do this to
3702 * try to catch the attention of an administrator, that they should not
3703 * be writing to this directory or continue to use it. */
3704 ACL = VVnodeDiskACL(rootvnode);
3705 ACL->size = sizeof(struct acl_accessList);
3706 ACL->version = ACL_ACLVERSION;
3710 ACL->entries[0].id = -204; /* system:administrators */
3711 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3713 rootvnode->type = vDirectory;
3714 rootvnode->cloned = 0;
3715 rootvnode->modeBits = 0777;
3716 rootvnode->linkCount = 2;
3717 VNDISK_SET_LEN(rootvnode, length);
3718 rootvnode->uniquifier = 1;
3719 rootvnode->dataVersion = dv;
3720 VNDISK_SET_INO(rootvnode, rootinode);
3721 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3722 rootvnode->author = 0;
3723 rootvnode->owner = 0;
3724 rootvnode->parent = 0;
3725 rootvnode->group = 0;
3726 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3728 /* write it out to disk */
3729 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3730 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3731 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3733 if (bytes != SIZEOF_LARGEDISKVNODE) {
3734 /* just cast to int and don't worry about printing real 64-bit ints;
3735 * a large disk vnode isn't anywhere near the 32-bit limit */
3736 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3737 (int)SIZEOF_LARGEDISKVNODE);
3741 /* update VnodeEssence for the new root vnode */
3742 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3744 vep->blockCount = nBlocks(length);
3745 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3746 vep->parent = rootvnode->parent;
3747 vep->unique = rootvnode->uniquifier;
3748 vep->modeBits = rootvnode->modeBits;
3749 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3750 vep->type = rootvnode->type;
3751 vep->author = rootvnode->author;
3752 vep->owner = rootvnode->owner;
3753 vep->group = rootvnode->group;
3763 /* update DirSummary for the new root vnode */
3764 rootdir->vnodeNumber = 1;
3765 rootdir->unique = 1;
3766 rootdir->haveDot = 1;
3767 rootdir->haveDotDot = 1;
3768 rootdir->rwVid = vid;
3769 rootdir->copied = 0;
3770 rootdir->parent = 0;
3771 rootdir->name = strdup(".");
3772 rootdir->vname = volHeader->name;
3773 rootdir->ds_linkH = alinkH;
3780 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3781 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3783 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3784 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3794 * salvage a volume group.
3796 * @param[in] salvinfo information for the curent salvage job
3797 * @param[in] rwIsp inode summary for rw volume
3798 * @param[in] alinkH link table inode handle
3800 * @return operation status
3804 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3806 /* This routine, for now, will only be called for read-write volumes */
3808 int BlocksInVolume = 0, FilesInVolume = 0;
3810 struct DirSummary rootdir, oldrootdir;
3811 struct VnodeInfo *dirVnodeInfo;
3812 struct VnodeDiskObject vnode;
3813 VolumeDiskData volHeader;
3815 int orphaned, rootdirfound = 0;
3816 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3817 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3818 struct VnodeEssence *vep;
3821 afs_sfsize_t nBytes;
3823 VnodeId LFVnode, ThisVnode;
3824 Unique LFUnique, ThisUnique;
3828 vid = rwIsp->volSummary->header.id;
3829 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3830 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3831 osi_Assert(nBytes == sizeof(volHeader));
3832 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3833 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3834 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3836 DistilVnodeEssence(salvinfo, vid, vLarge,
3837 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3838 DistilVnodeEssence(salvinfo, vid, vSmall,
3839 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3841 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3842 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3843 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3844 &rootdir, &rootdirfound);
3847 nt_sync(salvinfo->fileSysDevice);
3849 sync(); /* This used to be done lower level, for every dir */
3856 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3858 Log("Cannot find root directory for volume %lu; attempting to create "
3859 "a new one\n", afs_printable_uint32_lu(vid));
3861 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3866 salvinfo->VolumeChanged = 1;
3870 /* Parse each vnode looking for orphaned vnodes and
3871 * connect them to the tree as orphaned (if requested).
3873 oldrootdir = rootdir;
3874 for (class = 0; class < nVNODECLASSES; class++) {
3875 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3876 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3877 ThisVnode = bitNumberToVnodeNumber(v, class);
3878 ThisUnique = vep->unique;
3880 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3881 continue; /* Ignore unused, claimed, and root vnodes */
3883 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3884 * entry in this vnode had incremented the parent link count (In
3885 * JudgeEntry()). We need to go to the parent and decrement that
3886 * link count. But if the parent's unique is zero, then the parent
3887 * link count was not incremented in JudgeEntry().
3889 if (class == vLarge) { /* directory vnode */
3890 pv = vnodeIdToBitNumber(vep->parent);
3891 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3892 if (vep->parent == 1 && newrootdir) {
3893 /* this vnode's parent was the volume root, and
3894 * we just created the volume root. So, the parent
3895 * dir didn't exist during JudgeEntry, so the link
3896 * count was not inc'd there, so don't dec it here.
3902 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3908 continue; /* If no rootdir, can't attach orphaned files */
3910 /* Here we attach orphaned files and directories into the
3911 * root directory, LVVnode, making sure link counts stay correct.
3913 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3914 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3915 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3917 /* Update this orphaned vnode's info. Its parent info and
3918 * link count (do for orphaned directories and files).
3920 vep->parent = LFVnode; /* Parent is the root dir */
3921 vep->unique = LFUnique;
3924 vep->count--; /* Inc link count (root dir will pt to it) */
3926 /* If this orphaned vnode is a directory, change '..'.
3927 * The name of the orphaned dir/file is unknown, so we
3928 * build a unique name. No need to CopyOnWrite the directory
3929 * since it is not connected to tree in BK or RO volume and
3930 * won't be visible there.
3932 if (class == vLarge) {
3936 /* Remove and recreate the ".." entry in this orphaned directory */
3937 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3938 salvinfo->vnodeInfo[class].inodes[v],
3939 &salvinfo->VolumeChanged);
3941 pa.Unique = LFUnique;
3942 osi_Assert(Delete(&dh, "..") == 0);
3943 osi_Assert(Create(&dh, "..", &pa) == 0);
3945 /* The original parent's link count was decremented above.
3946 * Here we increment the new parent's link count.
3948 pv = vnodeIdToBitNumber(LFVnode);
3949 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3953 /* Go to the root dir and add this entry. The link count of the
3954 * root dir was incremented when ".." was created. Try 10 times.
3956 for (j = 0; j < 10; j++) {
3957 pa.Vnode = ThisVnode;
3958 pa.Unique = ThisUnique;
3960 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3962 vLarge) ? "__ORPHANDIR__" :
3963 "__ORPHANFILE__"), ThisVnode,
3966 CopyOnWrite(salvinfo, &rootdir);
3967 code = Create(&rootdir.dirHandle, npath, &pa);
3971 ThisUnique += 50; /* Try creating a different file */
3973 osi_Assert(code == 0);
3974 Log("Attaching orphaned %s to volume's root dir as %s\n",
3975 ((class == vLarge) ? "directory" : "file"), npath);
3977 } /* for each vnode in the class */
3978 } /* for each class of vnode */
3980 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3982 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3984 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3986 osi_Assert(code == 0);
3987 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3990 DFlush(); /* Flush the changes */
3991 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3992 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3993 orphans = ORPH_IGNORE;
3996 /* Write out all changed vnodes. Orphaned files and directories
3997 * will get removed here also (if requested).
3999 for (class = 0; class < nVNODECLASSES; class++) {
4000 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4001 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4002 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4003 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4004 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4005 for (i = 0; i < nVnodes; i++) {
4006 struct VnodeEssence *vnp = &vnodes[i];
4007 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4009 /* If the vnode is good but is unclaimed (not listed in
4010 * any directory entries), then it is orphaned.
4013 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4014 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4018 if (vnp->changed || vnp->count) {
4021 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4022 vnodeIndexOffset(vcp, vnodeNumber),
4023 (char *)&vnode, sizeof(vnode));
4024 osi_Assert(nBytes == sizeof(vnode));
4026 vnode.parent = vnp->parent;
4027 oldCount = vnode.linkCount;
4028 vnode.linkCount = vnode.linkCount - vnp->count;
4031 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4033 if (!vnp->todelete) {
4034 /* Orphans should have already been attached (if requested) */
4035 osi_Assert(orphans != ORPH_ATTACH);
4036 oblocks += vnp->blockCount;
4039 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4041 BlocksInVolume -= vnp->blockCount;
4043 if (VNDISK_GET_INO(&vnode)) {
4045 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4046 osi_Assert(code == 0);
4048 memset(&vnode, 0, sizeof(vnode));
4050 } else if (vnp->count) {
4052 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4055 vnode.modeBits = vnp->modeBits;
4058 vnode.dataVersion++;
4061 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4062 vnodeIndexOffset(vcp, vnodeNumber),
4063 (char *)&vnode, sizeof(vnode));
4064 osi_Assert(nBytes == sizeof(vnode));
4066 salvinfo->VolumeChanged = 1;
4070 if (!Showmode && ofiles) {
4071 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4073 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4077 for (class = 0; class < nVNODECLASSES; class++) {
4078 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4079 for (i = 0; i < vip->nVnodes; i++)
4080 if (vip->vnodes[i].name)
4081 free(vip->vnodes[i].name);
4088 /* Set correct resource utilization statistics */
4089 volHeader.filecount = FilesInVolume;
4090 volHeader.diskused = BlocksInVolume;
4092 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4093 if (volHeader.uniquifier < (maxunique + 1)) {
4095 Log("Volume uniquifier is too low; fixed\n");
4096 /* Plus 2,000 in case there are workstations out there with
4097 * cached vnodes that have since been deleted
4099 volHeader.uniquifier = (maxunique + 1 + 2000);
4103 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4104 "Only use this salvaged volume to copy data to another volume; "
4105 "do not continue to use this volume (%lu) as-is.\n",
4106 afs_printable_uint32_lu(vid));
4109 #ifdef FSSYNC_BUILD_CLIENT
4110 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4111 afs_int32 fsync_code;
4113 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4115 Log("Error trying to tell the fileserver to break callbacks for "
4116 "changed volume %lu; error code %ld\n",
4117 afs_printable_uint32_lu(vid),
4118 afs_printable_int32_ld(fsync_code));
4120 salvinfo->VolumeChanged = 0;
4123 #endif /* FSSYNC_BUILD_CLIENT */
4125 /* Turn off the inUse bit; the volume's been salvaged! */
4126 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4127 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4128 volHeader.inService = 1; /* allow service again */
4129 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4130 volHeader.dontSalvage = DONT_SALVAGE;
4131 salvinfo->VolumeChanged = 0;
4133 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4134 osi_Assert(nBytes == sizeof(volHeader));
4137 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4138 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4139 FilesInVolume, BlocksInVolume);
4142 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4143 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4149 ClearROInUseBit(struct VolumeSummary *summary)
4151 IHandle_t *h = summary->volumeInfoHandle;
4152 afs_sfsize_t nBytes;
4154 VolumeDiskData volHeader;
4156 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4157 osi_Assert(nBytes == sizeof(volHeader));
4158 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4159 volHeader.inUse = 0;
4160 volHeader.needsSalvaged = 0;
4161 volHeader.inService = 1;
4162 volHeader.dontSalvage = DONT_SALVAGE;
4164 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4165 osi_Assert(nBytes == sizeof(volHeader));
4170 * Possible delete the volume.
4172 * deleteMe - Always do so, only a partial volume.
4175 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4176 char *message, int deleteMe, int check)
4178 if (readOnly(isp) || deleteMe) {
4179 if (isp->volSummary && isp->volSummary->fileName) {
4182 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4184 Log("It will be deleted on this server (you may find it elsewhere)\n");
4187 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4189 Log("it will be deleted instead. It should be recloned.\n");
4194 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4196 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4198 Log("Error %ld destroying volume disk header for volume %lu\n",
4199 afs_printable_int32_ld(code),
4200 afs_printable_uint32_lu(isp->volumeId));
4203 /* make sure we actually delete the fileName file; ENOENT
4204 * is fine, since VDestroyVolumeDiskHeader probably already
4206 if (unlink(path) && errno != ENOENT) {
4207 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4209 if (salvinfo->useFSYNC) {
4210 AskDelete(salvinfo, isp->volumeId);
4212 isp->volSummary->deleted = 1;
4215 } else if (!check) {
4216 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4218 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4222 #ifdef AFS_DEMAND_ATTACH_FS
4224 * Locks a volume on disk for salvaging.
4226 * @param[in] volumeId volume ID to lock
4228 * @return operation status
4230 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4231 * checked out and locked again
4236 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4241 /* should always be WRITE_LOCK, but keep the lock-type logic all
4242 * in one place, in VVolLockType. Params will be ignored, but
4243 * try to provide what we're logically doing. */
4244 locktype = VVolLockType(V_VOLUPD, 1);
4246 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4248 if (code == EBUSY) {
4249 Abort("Someone else appears to be using volume %lu; Aborted\n",
4250 afs_printable_uint32_lu(volumeId));
4252 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4253 afs_printable_int32_ld(code),
4254 afs_printable_uint32_lu(volumeId));
4257 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4258 if (code == SYNC_DENIED) {
4259 /* need to retry checking out volumes */
4262 if (code != SYNC_OK) {
4263 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4264 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4267 /* set inUse = programType in the volume header to ensure that nobody
4268 * tries to use this volume again without salvaging, if we somehow crash
4269 * or otherwise exit before finishing the salvage.
4273 struct VolumeHeader header;
4274 struct VolumeDiskHeader diskHeader;
4275 struct VolumeDiskData volHeader;
4277 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4282 DiskToVolumeHeader(&header, &diskHeader);
4284 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4285 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4286 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4292 volHeader.inUse = programType;
4294 /* If we can't re-write the header, bail out and error. We don't
4295 * assert when reading the header, since it's possible the
4296 * header isn't really there (when there's no data associated
4297 * with the volume; we just delete the vol header file in that
4298 * case). But if it's there enough that we can read it, but
4299 * somehow we cannot write to it to signify we're salvaging it,
4300 * we've got a big problem and we cannot continue. */
4301 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4308 #endif /* AFS_DEMAND_ATTACH_FS */
4311 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4316 memset(&res, 0, sizeof(res));
4318 for (i = 0; i < 3; i++) {
4319 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4320 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4322 if (code == SYNC_OK) {
4324 } else if (code == SYNC_DENIED) {
4326 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4328 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4329 Abort("Salvage aborted\n");
4330 } else if (code == SYNC_BAD_COMMAND) {
4331 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4334 #ifdef AFS_DEMAND_ATTACH_FS
4335 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4337 Log("AskOffline: fileserver is DAFS but we are not.\n");
4340 #ifdef AFS_DEMAND_ATTACH_FS
4341 Log("AskOffline: fileserver is not DAFS but we are.\n");
4343 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4346 Abort("Salvage aborted\n");
4349 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4350 FSYNC_clientFinis();
4354 if (code != SYNC_OK) {
4355 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4356 Abort("Salvage aborted\n");
4360 /* don't want to pass around state; remember it here */
4361 static int isDAFS = -1;
4365 afs_int32 code, i, ret = 0;
4368 /* we don't care if we race. the answer shouldn't change */
4372 memset(&res, 0, sizeof(res));
4374 for (i = 0; i < 3; i++) {
4375 code = FSYNC_VolOp(1, NULL,
4376 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4378 if (code == SYNC_OK) {
4381 } else if (code == SYNC_DENIED) {
4384 } else if (code == SYNC_BAD_COMMAND) {
4387 } else if (code == SYNC_FAILED) {
4388 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4394 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4395 FSYNC_clientFinis();
4405 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4409 for (i = 0; i < 3; i++) {
4410 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4411 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4413 if (code == SYNC_OK) {
4415 } else if (code == SYNC_DENIED) {
4416 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4417 } else if (code == SYNC_BAD_COMMAND) {
4418 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4420 Log("AskOnline: please make sure file server binaries are same version.\n");
4424 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4425 FSYNC_clientFinis();
4432 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4436 for (i = 0; i < 3; i++) {
4437 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4438 FSYNC_VOL_DONE, FSYNC_SALVAGE, NULL);
4440 if (code == SYNC_OK) {
4442 } else if (code == SYNC_DENIED) {
4443 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4444 } else if (code == SYNC_BAD_COMMAND) {
4445 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4448 #ifdef AFS_DEMAND_ATTACH_FS
4449 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4451 Log("AskOnline: fileserver is DAFS but we are not.\n");
4454 #ifdef AFS_DEMAND_ATTACH_FS
4455 Log("AskOnline: fileserver is not DAFS but we are.\n");
4457 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4463 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4464 FSYNC_clientFinis();
4471 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4473 /* Volume parameter is passed in case iopen is upgraded in future to
4474 * require a volume Id to be passed
4477 IHandle_t *srcH, *destH;
4478 FdHandle_t *srcFdP, *destFdP;
4480 afs_foff_t size = 0;
4482 IH_INIT(srcH, device, rwvolume, inode1);
4483 srcFdP = IH_OPEN(srcH);
4484 osi_Assert(srcFdP != NULL);
4485 IH_INIT(destH, device, rwvolume, inode2);
4486 destFdP = IH_OPEN(destH);
4487 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4488 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4491 osi_Assert(nBytes == 0);
4492 FDH_REALLYCLOSE(srcFdP);
4493 FDH_REALLYCLOSE(destFdP);
4500 PrintInodeList(struct SalvInfo *salvinfo)
4502 struct ViceInodeInfo *ip;
4503 struct ViceInodeInfo *buf;
4506 afs_sfsize_t st_size;
4508 st_size = OS_SIZE(salvinfo->inodeFd);
4509 osi_Assert(st_size >= 0);
4510 buf = (struct ViceInodeInfo *)malloc(st_size);
4511 osi_Assert(buf != NULL);
4512 nInodes = st_size / sizeof(struct ViceInodeInfo);
4513 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4514 for (ip = buf; nInodes--; ip++) {
4515 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4516 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4517 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4518 ip->u.param[2], ip->u.param[3]);
4524 PrintInodeSummary(struct SalvInfo *salvinfo)
4527 struct InodeSummary *isp;
4529 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4530 isp = &salvinfo->inodeSummary[i];
4531 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4536 PrintVolumeSummary(struct SalvInfo *salvinfo)
4539 struct VolumeSummary *vsp;
4541 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4542 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4552 osi_Assert(0); /* Fork is never executed in the NT code path */
4556 #ifdef AFS_DEMAND_ATTACH_FS
4557 if ((f == 0) && (programType == salvageServer)) {
4558 /* we are a salvageserver child */
4559 #ifdef FSSYNC_BUILD_CLIENT
4560 VChildProcReconnectFS_r();
4562 #ifdef SALVSYNC_BUILD_CLIENT
4566 #endif /* AFS_DEMAND_ATTACH_FS */
4567 #endif /* !AFS_NT40_ENV */
4577 #ifdef AFS_DEMAND_ATTACH_FS
4578 if (programType == salvageServer) {
4579 #ifdef SALVSYNC_BUILD_CLIENT
4582 #ifdef FSSYNC_BUILD_CLIENT
4586 #endif /* AFS_DEMAND_ATTACH_FS */
4589 if (main_thread != pthread_self())
4590 pthread_exit((void *)code);
4603 pid = wait(&status);
4604 osi_Assert(pid != -1);
4605 if (WCOREDUMP(status))
4606 Log("\"%s\" core dumped!\n", prog);
4607 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4613 TimeStamp(time_t clock, int precision)
4616 static char timestamp[20];
4617 lt = localtime(&clock);
4619 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4621 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4626 CheckLogFile(char * log_path)
4628 char oldSlvgLog[AFSDIR_PATH_MAX];
4630 #ifndef AFS_NT40_ENV
4637 strcpy(oldSlvgLog, log_path);
4638 strcat(oldSlvgLog, ".old");
4640 renamefile(log_path, oldSlvgLog);
4641 logFile = afs_fopen(log_path, "a");
4643 if (!logFile) { /* still nothing, use stdout */
4647 #ifndef AFS_NAMEI_ENV
4648 AFS_DEBUG_IOPS_LOG(logFile);
4653 #ifndef AFS_NT40_ENV
4655 TimeStampLogFile(char * log_path)
4657 char stampSlvgLog[AFSDIR_PATH_MAX];
4662 lt = localtime(&now);
4663 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4664 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4665 log_path, lt->tm_year + 1900,
4666 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4669 /* try to link the logfile to a timestamped filename */
4670 /* if it fails, oh well, nothing we can do */
4671 link(log_path, stampSlvgLog);
4680 #ifndef AFS_NT40_ENV
4682 printf("Can't show log since using syslog.\n");
4693 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4696 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4699 while (fgets(line, sizeof(line), logFile))
4706 Log(const char *format, ...)
4712 va_start(args, format);
4713 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4715 #ifndef AFS_NT40_ENV
4717 syslog(LOG_INFO, "%s", tmp);
4721 gettimeofday(&now, 0);
4722 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4728 Abort(const char *format, ...)
4733 va_start(args, format);
4734 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4736 #ifndef AFS_NT40_ENV
4738 syslog(LOG_INFO, "%s", tmp);
4742 fprintf(logFile, "%s", tmp);
4754 ToString(const char *s)
4757 p = (char *)malloc(strlen(s) + 1);
4758 osi_Assert(p != NULL);
4763 /* Remove the FORCESALVAGE file */
4765 RemoveTheForce(char *path)
4768 struct afs_stat_st force; /* so we can use afs_stat to find it */
4769 strcpy(target,path);
4770 strcat(target,"/FORCESALVAGE");
4771 if (!Testing && ForceSalvage) {
4772 if (afs_stat(target,&force) == 0) unlink(target);
4776 #ifndef AFS_AIX32_ENV
4778 * UseTheForceLuke - see if we can use the force
4781 UseTheForceLuke(char *path)
4783 struct afs_stat_st force;
4785 strcpy(target,path);
4786 strcat(target,"/FORCESALVAGE");
4788 return (afs_stat(target, &force) == 0);
4792 * UseTheForceLuke - see if we can use the force
4795 * The VRMIX fsck will not muck with the filesystem it is supposedly
4796 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4797 * muck directly with the root inode, which is within the normal
4799 * ListViceInodes() has a side effect of setting ForceSalvage if
4800 * it detects a need, based on root inode examination.
4803 UseTheForceLuke(char *path)
4806 return 0; /* sorry OB1 */
4811 /* NT support routines */
4813 static char execpathname[MAX_PATH];
4815 nt_SalvagePartition(char *partName, int jobn)
4820 if (!*execpathname) {
4821 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4822 if (!n || n == 1023)
4825 job.cj_magic = SALVAGER_MAGIC;
4826 job.cj_number = jobn;
4827 (void)strcpy(job.cj_part, partName);
4828 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4833 nt_SetupPartitionSalvage(void *datap, int len)
4835 childJob_t *jobp = (childJob_t *) datap;
4836 char logname[AFSDIR_PATH_MAX];
4838 if (len != sizeof(childJob_t))
4840 if (jobp->cj_magic != SALVAGER_MAGIC)
4845 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4847 logFile = afs_fopen(logname, "w");
4855 #endif /* AFS_NT40_ENV */