2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
208 extern void *calloc();
210 static char *TimeStamp(time_t clock, int precision);
213 int debug; /* -d flag */
214 extern int Testing; /* -n flag */
215 int ListInodeOption; /* -i flag */
216 int ShowRootFiles; /* -r flag */
217 int RebuildDirs; /* -sal flag */
218 int Parallel = 4; /* -para X flag */
219 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
220 int forceR = 0; /* -b flag */
221 int ShowLog = 0; /* -showlog flag */
222 int ShowSuid = 0; /* -showsuid flag */
223 int ShowMounts = 0; /* -showmounts flag */
224 int orphans = ORPH_IGNORE; /* -orphans option */
229 int useSyslog = 0; /* -syslog flag */
230 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
239 #define MAXPARALLEL 32
241 int OKToZap; /* -o flag */
242 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
243 * in the volume header */
245 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
247 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
250 * information that is 'global' to a particular salvage job.
253 Device fileSysDevice; /**< The device number of the current partition
255 char fileSysPath[8]; /**< The path of the mounted partition currently
256 * being salvaged, i.e. the directory containing
257 * the volume headers */
258 char *fileSysPathName; /**< NT needs this to make name pretty log. */
259 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
260 int VGLinkH_cnt; /**< # of references to lnk handle. */
261 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
264 char *fileSysDeviceName; /**< The block device where the file system being
265 * salvaged was mounted */
266 char *filesysfulldev;
268 int VolumeChanged; /**< Set by any routine which would change the
269 * volume in a way which would require callbacks
270 * to be broken if the volume was put back on
271 * on line by an active file server */
273 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
274 * header dealt with */
276 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
277 FD_t inodeFd; /**< File descriptor for inode file */
279 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
280 int nVolumes; /**< Number of volumes (read-write and read-only)
281 * in volume summary */
282 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
285 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
286 * vnodes in the volume that
287 * we are currently looking
289 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
290 * to contact the fileserver over FSYNC */
297 /* Forward declarations */
298 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
299 static int AskVolumeSummary(struct SalvInfo *salvinfo,
300 VolumeId singleVolumeNumber);
302 #ifdef AFS_DEMAND_ATTACH_FS
303 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
304 #endif /* AFS_DEMAND_ATTACH_FS */
306 /* Uniquifier stored in the Inode */
311 return (u & 0x3fffff);
313 #if defined(AFS_SGI_EXMAG)
314 return (u & SGI_UNIQMASK);
317 #endif /* AFS_SGI_EXMAG */
324 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
326 return 0; /* otherwise may be transient, e.g. EMFILE */
331 char *save_args[MAX_ARGS];
333 extern pthread_t main_thread;
334 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
338 * Get the salvage lock if not already held. Hold until process exits.
340 * @param[in] locktype READ_LOCK or WRITE_LOCK
343 _ObtainSalvageLock(int locktype)
345 struct VLockFile salvageLock;
350 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
352 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
355 "salvager: There appears to be another salvager running! "
360 "salvager: Error %d trying to acquire salvage lock! "
366 ObtainSalvageLock(void)
368 _ObtainSalvageLock(WRITE_LOCK);
371 ObtainSharedSalvageLock(void)
373 _ObtainSalvageLock(READ_LOCK);
377 #ifdef AFS_SGI_XFS_IOPS_ENV
378 /* Check if the given partition is mounted. For XFS, the root inode is not a
379 * constant. So we check the hard way.
382 IsPartitionMounted(char *part)
385 struct mntent *mntent;
387 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
388 while (mntent = getmntent(mntfp)) {
389 if (!strcmp(part, mntent->mnt_dir))
394 return mntent ? 1 : 1;
397 /* Check if the given inode is the root of the filesystem. */
398 #ifndef AFS_SGI_XFS_IOPS_ENV
400 IsRootInode(struct afs_stat_st *status)
403 * The root inode is not a fixed value in XFS partitions. So we need to
404 * see if the partition is in the list of mounted partitions. This only
405 * affects the SalvageFileSys path, so we check there.
407 return (status->st_ino == ROOTINODE);
412 #ifndef AFS_NAMEI_ENV
413 /* We don't want to salvage big files filesystems, since we can't put volumes on
417 CheckIfBigFilesFS(char *mountPoint, char *devName)
419 struct superblock fs;
422 if (strncmp(devName, "/dev/", 5)) {
423 (void)sprintf(name, "/dev/%s", devName);
425 (void)strcpy(name, devName);
428 if (ReadSuper(&fs, name) < 0) {
429 Log("Unable to read superblock. Not salvaging partition %s.\n",
433 if (IsBigFilesFileSystem(&fs)) {
434 Log("Partition %s is a big files filesystem, not salvaging.\n",
444 #define HDSTR "\\Device\\Harddisk"
445 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
447 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
453 static int dowarn = 1;
455 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
457 if (strncmp(res1, HDSTR, HDLEN)) {
460 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
461 res1, HDSTR, p1->devName);
464 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
466 if (strncmp(res2, HDSTR, HDLEN)) {
469 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
470 res2, HDSTR, p2->devName);
474 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
477 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
480 /* This assumes that two partitions with the same device number divided by
481 * PartsPerDisk are on the same disk.
484 SalvageFileSysParallel(struct DiskPartition64 *partP)
487 struct DiskPartition64 *partP;
488 int pid; /* Pid for this job */
489 int jobnumb; /* Log file job number */
490 struct job *nextjob; /* Next partition on disk to salvage */
492 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
493 struct job *thisjob = 0;
494 static int numjobs = 0;
495 static int jobcount = 0;
501 char logFileName[256];
505 /* We have a partition to salvage. Copy it into thisjob */
506 thisjob = (struct job *)malloc(sizeof(struct job));
508 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
511 memset(thisjob, 0, sizeof(struct job));
512 thisjob->partP = partP;
513 thisjob->jobnumb = jobcount;
515 } else if (jobcount == 0) {
516 /* We are asking to wait for all jobs (partp == 0), yet we never
519 Log("No file system partitions named %s* found; not salvaged\n",
520 VICE_PARTITION_PREFIX);
524 if (debug || Parallel == 1) {
526 SalvageFileSys(thisjob->partP, 0);
533 /* Check to see if thisjob is for a disk that we are already
534 * salvaging. If it is, link it in as the next job to do. The
535 * jobs array has 1 entry per disk being salvages. numjobs is
536 * the total number of disks currently being salvaged. In
537 * order to keep thejobs array compact, when a disk is
538 * completed, the hightest element in the jobs array is moved
539 * down to now open slot.
541 for (j = 0; j < numjobs; j++) {
542 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
543 /* On same disk, add it to this list and return */
544 thisjob->nextjob = jobs[j]->nextjob;
545 jobs[j]->nextjob = thisjob;
552 /* Loop until we start thisjob or until all existing jobs are finished */
553 while (thisjob || (!partP && (numjobs > 0))) {
554 startjob = -1; /* No new job to start */
556 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
557 /* Either the max jobs are running or we have to wait for all
558 * the jobs to finish. In either case, we wait for at least one
559 * job to finish. When it's done, clean up after it.
561 pid = wait(&wstatus);
562 osi_Assert(pid != -1);
563 for (j = 0; j < numjobs; j++) { /* Find which job it is */
564 if (pid == jobs[j]->pid)
567 osi_Assert(j < numjobs);
568 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
569 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
572 numjobs--; /* job no longer running */
573 oldjob = jobs[j]; /* remember */
574 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
575 free(oldjob); /* free the old job */
577 /* If there is another partition on the disk to salvage, then
578 * say we will start it (startjob). If not, then put thisjob there
579 * and say we will start it.
581 if (jobs[j]) { /* Another partitions to salvage */
582 startjob = j; /* Will start it */
583 } else { /* There is not another partition to salvage */
585 jobs[j] = thisjob; /* Add thisjob */
587 startjob = j; /* Will start it */
589 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
590 startjob = -1; /* Don't start it - already running */
594 /* We don't have to wait for a job to complete */
596 jobs[numjobs] = thisjob; /* Add this job */
598 startjob = numjobs; /* Will start it */
602 /* Start up a new salvage job on a partition in job slot "startjob" */
603 if (startjob != -1) {
605 Log("Starting salvage of file system partition %s\n",
606 jobs[startjob]->partP->name);
608 /* For NT, we not only fork, but re-exec the salvager. Pass in the
609 * commands and pass the child job number via the data path.
612 nt_SalvagePartition(jobs[startjob]->partP->name,
613 jobs[startjob]->jobnumb);
614 jobs[startjob]->pid = pid;
619 jobs[startjob]->pid = pid;
625 for (fd = 0; fd < 16; fd++)
632 openlog("salvager", LOG_PID, useSyslogFacility);
636 (void)afs_snprintf(logFileName, sizeof logFileName,
638 AFSDIR_SERVER_SLVGLOG_FILEPATH,
639 jobs[startjob]->jobnumb);
640 logFile = afs_fopen(logFileName, "w");
645 SalvageFileSys1(jobs[startjob]->partP, 0);
650 } /* while ( thisjob || (!partP && numjobs > 0) ) */
652 /* If waited for all jobs to complete, now collect log files and return */
654 if (!useSyslog) /* if syslogging - no need to collect */
657 for (i = 0; i < jobcount; i++) {
658 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
659 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
660 if ((passLog = afs_fopen(logFileName, "r"))) {
661 while (fgets(buf, sizeof(buf), passLog)) {
666 (void)unlink(logFileName);
675 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
677 if (!canfork || debug || Fork() == 0) {
678 SalvageFileSys1(partP, singleVolumeNumber);
679 if (canfork && !debug) {
684 Wait("SalvageFileSys");
688 get_DevName(char *pbuffer, char *wpath)
690 char pbuf[128], *ptr;
691 strcpy(pbuf, pbuffer);
692 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
698 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
700 strcpy(pbuffer, ptr + 1);
707 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
710 char inodeListPath[256];
711 FD_t inodeFile = INVALID_FD;
712 static char tmpDevName[100];
713 static char wpath[100];
714 struct VolumeSummary *vsp, *esp;
718 struct SalvInfo l_salvinfo;
719 struct SalvInfo *salvinfo = &l_salvinfo;
722 memset(salvinfo, 0, sizeof(*salvinfo));
725 if (inodeFile != INVALID_FD) {
727 inodeFile = INVALID_FD;
729 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
730 Abort("Raced too many times with fileserver restarts while trying to "
731 "checkout/lock volumes; Aborted\n");
733 #ifdef AFS_DEMAND_ATTACH_FS
735 /* unlock all previous volume locks, since we're about to lock them
737 VLockFileReinit(&partP->volLockFile);
739 #endif /* AFS_DEMAND_ATTACH_FS */
741 salvinfo->fileSysPartition = partP;
742 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
743 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
746 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
747 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
748 name = partP->devName;
750 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
751 strcpy(tmpDevName, partP->devName);
752 name = get_DevName(tmpDevName, wpath);
753 salvinfo->fileSysDeviceName = name;
754 salvinfo->filesysfulldev = wpath;
757 if (singleVolumeNumber) {
758 #ifndef AFS_DEMAND_ATTACH_FS
759 /* only non-DAFS locks the partition when salvaging a single volume;
760 * DAFS will lock the individual volumes in the VG */
761 VLockPartition(partP->name);
762 #endif /* !AFS_DEMAND_ATTACH_FS */
766 /* salvageserver already setup fssync conn for us */
767 if ((programType != salvageServer) && !VConnectFS()) {
768 Abort("Couldn't connect to file server\n");
771 salvinfo->useFSYNC = 1;
772 AskOffline(salvinfo, singleVolumeNumber);
773 #ifdef AFS_DEMAND_ATTACH_FS
774 if (LockVolume(salvinfo, singleVolumeNumber)) {
777 #endif /* AFS_DEMAND_ATTACH_FS */
780 salvinfo->useFSYNC = 0;
781 VLockPartition(partP->name);
785 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
788 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
789 partP->name, name, (Testing ? "(READONLY mode)" : ""));
791 Log("***Forced salvage of all volumes on this partition***\n");
796 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
803 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
804 while ((dp = readdir(dirp))) {
805 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
806 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
808 Log("Removing old salvager temp files %s\n", dp->d_name);
809 strcpy(npath, salvinfo->fileSysPath);
810 strcat(npath, OS_DIRSEP);
811 strcat(npath, dp->d_name);
817 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
819 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
820 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
822 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
826 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
827 if (inodeFile == INVALID_FD) {
828 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
831 /* Using nt_unlink here since we're really using the delete on close
832 * semantics of unlink. In most places in the salvager, we really do
833 * mean to unlink the file at that point. Those places have been
834 * modified to actually do that so that the NT crt can be used there.
836 * jaltman - On NT delete on close cannot be applied to a file while the
837 * process has an open file handle that does not have DELETE file
838 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
839 * delete privileges. As a result the nt_unlink() call will always
842 code = nt_unlink(inodeListPath);
844 code = unlink(inodeListPath);
847 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
850 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
854 salvinfo->inodeFd = inodeFile;
855 if (salvinfo->inodeFd == INVALID_FD)
856 Abort("Temporary file %s is missing...\n", inodeListPath);
857 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
858 if (ListInodeOption) {
859 PrintInodeList(salvinfo);
862 /* enumerate volumes in the partition.
863 * figure out sets of read-only + rw volumes.
864 * salvage each set, read-only volumes first, then read-write.
865 * Fix up inodes on last volume in set (whether it is read-write
868 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
872 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
873 i < salvinfo->nVolumesInInodeFile; i = j) {
874 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
876 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
878 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
879 struct VolumeSummary *tsp;
880 /* Scan volume list (from partition root directory) looking for the
881 * current rw volume number in the volume list from the inode scan.
882 * If there is one here that is not in the inode volume list,
884 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
886 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
888 /* Now match up the volume summary info from the root directory with the
889 * entry in the volume list obtained from scanning inodes */
890 salvinfo->inodeSummary[j].volSummary = NULL;
891 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
892 if (tsp->header.id == vid) {
893 salvinfo->inodeSummary[j].volSummary = tsp;
899 /* Salvage the group of volumes (several read-only + 1 read/write)
900 * starting with the current read-only volume we're looking at.
902 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
905 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
906 for (; vsp < esp; vsp++) {
908 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
911 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
912 RemoveTheForce(salvinfo->fileSysPath);
914 if (!Testing && singleVolumeNumber) {
916 #ifdef AFS_DEMAND_ATTACH_FS
917 /* unlock vol headers so the fs can attach them when we AskOnline */
918 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
919 #endif /* AFS_DEMAND_ATTACH_FS */
921 /* Step through the volumeSummary list and set all volumes on-line.
922 * Most volumes were taken off-line in GetVolumeSummary.
923 * If a volume was deleted, don't tell the fileserver anything, since
924 * we already told the fileserver the volume was deleted back when we
925 * we destroyed the volume header.
926 * Also, make sure we bring the singleVolumeNumber back online first.
929 for (j = 0; j < salvinfo->nVolumes; j++) {
930 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
932 if (!salvinfo->volumeSummaryp[j].deleted) {
933 AskOnline(salvinfo, singleVolumeNumber);
939 /* If singleVolumeNumber is not in our volumeSummary, it means that
940 * at least one other volume in the VG is on the partition, but the
941 * RW volume is not. We've already AskOffline'd it by now, though,
942 * so make sure we don't still have the volume checked out. */
943 AskDelete(salvinfo, singleVolumeNumber);
946 for (j = 0; j < salvinfo->nVolumes; j++) {
947 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
948 if (!salvinfo->volumeSummaryp[j].deleted) {
949 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
955 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
956 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
959 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
963 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
966 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
969 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
972 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
974 Log("Error %ld destroying volume disk header for volume %lu\n",
975 afs_printable_int32_ld(code),
976 afs_printable_uint32_lu(vsp->header.id));
979 /* make sure we actually delete the fileName file; ENOENT
980 * is fine, since VDestroyVolumeDiskHeader probably already
982 if (unlink(path) && errno != ENOENT) {
983 Log("Unable to unlink %s (errno = %d)\n", path, errno);
985 if (salvinfo->useFSYNC) {
986 AskDelete(salvinfo, vsp->header.id);
994 CompareInodes(const void *_p1, const void *_p2)
996 const struct ViceInodeInfo *p1 = _p1;
997 const struct ViceInodeInfo *p2 = _p2;
998 if (p1->u.vnode.vnodeNumber == INODESPECIAL
999 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1000 VolumeId p1rwid, p2rwid;
1002 (p1->u.vnode.vnodeNumber ==
1003 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1005 (p2->u.vnode.vnodeNumber ==
1006 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1007 if (p1rwid < p2rwid)
1009 if (p1rwid > p2rwid)
1011 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1012 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1013 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1014 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1015 if (p1->u.vnode.volumeId == p1rwid)
1017 if (p2->u.vnode.volumeId == p2rwid)
1019 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1021 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1022 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1023 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1025 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1027 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1029 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1031 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1033 /* The following tests are reversed, so that the most desirable
1034 * of several similar inodes comes first */
1035 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1036 #ifdef AFS_3DISPARES
1037 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1038 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1041 #ifdef AFS_SGI_EXMAG
1042 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1043 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1048 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1049 #ifdef AFS_3DISPARES
1050 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1051 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1054 #ifdef AFS_SGI_EXMAG
1055 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1056 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1061 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1062 #ifdef AFS_3DISPARES
1063 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1064 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1067 #ifdef AFS_SGI_EXMAG
1068 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1069 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1074 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1075 #ifdef AFS_3DISPARES
1076 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1077 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1080 #ifdef AFS_SGI_EXMAG
1081 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1082 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1091 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1092 struct InodeSummary *summary)
1094 VolumeId volume = ip->u.vnode.volumeId;
1095 VolumeId rwvolume = volume;
1100 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1102 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1104 rwvolume = ip->u.special.parentId;
1105 /* This isn't quite right, as there could (in error) be different
1106 * parent inodes in different special vnodes */
1108 if (maxunique < ip->u.vnode.vnodeUniquifier)
1109 maxunique = ip->u.vnode.vnodeUniquifier;
1113 summary->volumeId = volume;
1114 summary->RWvolumeId = rwvolume;
1115 summary->nInodes = n;
1116 summary->nSpecialInodes = nSpecial;
1117 summary->maxUniquifier = maxunique;
1121 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1123 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1124 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1125 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1130 * Collect list of inodes in file named by path. If a truly fatal error,
1131 * unlink the file and abort. For lessor errors, return -1. The file will
1132 * be unlinked by the caller.
1135 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1139 struct ViceInodeInfo *ip, *ip_save;
1140 struct InodeSummary summary;
1141 char summaryFileName[50];
1142 FD_t summaryFile = INVALID_FD;
1144 char *dev = salvinfo->fileSysPath;
1145 char *wpath = salvinfo->fileSysPath;
1147 char *dev = salvinfo->fileSysDeviceName;
1148 char *wpath = salvinfo->filesysfulldev;
1150 char *part = salvinfo->fileSysPath;
1153 afs_sfsize_t st_size;
1155 /* This file used to come from vfsck; cobble it up ourselves now... */
1157 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1158 singleVolumeNumber ? OnlyOneVolume : 0,
1159 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1161 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1164 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1166 if (forceSal && !ForceSalvage) {
1167 Log("***Forced salvage of all volumes on this partition***\n");
1170 OS_SEEK(inodeFile, 0L, SEEK_SET);
1171 salvinfo->inodeFd = inodeFile;
1172 if (salvinfo->inodeFd == INVALID_FD ||
1173 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1174 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1176 tdir = (tmpdir ? tmpdir : part);
1178 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1179 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1181 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1182 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1184 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1185 if (summaryFile == INVALID_FD) {
1186 Abort("Unable to create inode summary file\n");
1190 /* Using nt_unlink here since we're really using the delete on close
1191 * semantics of unlink. In most places in the salvager, we really do
1192 * mean to unlink the file at that point. Those places have been
1193 * modified to actually do that so that the NT crt can be used there.
1195 * jaltman - As commented elsewhere, this cannot work because fopen()
1196 * does not open files with DELETE and FILE_SHARE_DELETE.
1198 code = nt_unlink(summaryFileName);
1200 code = unlink(summaryFileName);
1203 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1206 if (!canfork || debug || Fork() == 0) {
1207 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1209 OS_CLOSE(summaryFile);
1210 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1211 RemoveTheForce(salvinfo->fileSysPath);
1213 struct VolumeSummary *vsp;
1216 GetVolumeSummary(salvinfo, singleVolumeNumber);
1218 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1220 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1223 Log("%s vice inodes on %s; not salvaged\n",
1224 singleVolumeNumber ? "No applicable" : "No", dev);
1227 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1229 OS_CLOSE(summaryFile);
1231 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1234 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1235 OS_CLOSE(summaryFile);
1236 Abort("Unable to read inode table; %s not salvaged\n", dev);
1238 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1239 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1240 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1241 OS_CLOSE(summaryFile);
1242 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1247 CountVolumeInodes(ip, nInodes, &summary);
1248 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1249 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1250 OS_CLOSE(summaryFile);
1253 summary.index += (summary.nInodes);
1254 nInodes -= summary.nInodes;
1255 ip += summary.nInodes;
1258 ip = ip_save = NULL;
1259 /* Following fflush is not fclose, because if it was debug mode would not work */
1260 if (OS_SYNC(summaryFile) == -1) {
1261 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1262 OS_CLOSE(summaryFile);
1265 if (canfork && !debug) {
1270 if (Wait("Inode summary") == -1) {
1271 OS_CLOSE(summaryFile);
1272 Exit(1); /* salvage of this partition aborted */
1276 st_size = OS_SIZE(summaryFile);
1277 osi_Assert(st_size >= 0);
1280 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1281 osi_Assert(salvinfo->inodeSummary != NULL);
1282 /* For GNU we need to do lseek to get the file pointer moved. */
1283 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1284 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1285 osi_Assert(ret == st_size);
1287 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1288 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1289 salvinfo->inodeSummary[i].volSummary = NULL;
1291 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1292 OS_CLOSE(summaryFile);
1296 /* Comparison routine for volume sort.
1297 This is setup so that a read-write volume comes immediately before
1298 any read-only clones of that volume */
1300 CompareVolumes(const void *_p1, const void *_p2)
1302 const struct VolumeSummary *p1 = _p1;
1303 const struct VolumeSummary *p2 = _p2;
1304 if (p1->header.parent != p2->header.parent)
1305 return p1->header.parent < p2->header.parent ? -1 : 1;
1306 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1308 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1310 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1314 * Gleans volumeSummary information by asking the fileserver
1316 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1317 * salvaging a whole partition
1319 * @return whether we obtained the volume summary information or not
1320 * @retval 0 success; we obtained the volume summary information
1321 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1323 * @retval 1 we did not get the volume summary information; either the
1324 * fileserver responded with an error, or we are not supposed to
1325 * ask the fileserver for the information (e.g. we are salvaging
1326 * the entire partition or we are not the salvageserver)
1328 * @note for non-DAFS, always returns 1
1331 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1334 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1335 if (programType == salvageServer) {
1336 if (singleVolumeNumber) {
1337 FSSYNC_VGQry_response_t q_res;
1339 struct VolumeSummary *vsp;
1341 struct VolumeDiskHeader diskHdr;
1343 memset(&res, 0, sizeof(res));
1345 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1348 * We must wait for the partition to finish scanning before
1349 * can continue, since we will not know if we got the entire
1350 * VG membership unless the partition is fully scanned.
1351 * We could, in theory, just scan the partition ourselves if
1352 * the VG cache is not ready, but we would be doing the exact
1353 * same scan the fileserver is doing; it will almost always
1354 * be faster to wait for the fileserver. The only exceptions
1355 * are if the partition does not take very long to scan, and
1356 * in that case it's fast either way, so who cares?
1358 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1359 Log("waiting for fileserver to finish scanning partition %s...\n",
1360 salvinfo->fileSysPartition->name);
1362 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1363 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1364 * just so small partitions don't need to wait over 10
1365 * seconds every time, and large partitions are generally
1366 * polled only once every ten seconds. */
1367 sleep((i > 10) ? (i = 10) : i);
1369 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1373 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1374 /* This can happen if there's no header for the volume
1375 * we're salvaging, or no headers exist for the VG (if
1376 * we're salvaging an RW). Act as if we got a response
1377 * with no VG members. The headers may be created during
1378 * salvaging, if there are inodes in this VG. */
1380 memset(&q_res, 0, sizeof(q_res));
1381 q_res.rw = singleVolumeNumber;
1385 Log("fileserver refused VGCQuery request for volume %lu on "
1386 "partition %s, code %ld reason %ld\n",
1387 afs_printable_uint32_lu(singleVolumeNumber),
1388 salvinfo->fileSysPartition->name,
1389 afs_printable_int32_ld(code),
1390 afs_printable_int32_ld(res.hdr.reason));
1394 if (q_res.rw != singleVolumeNumber) {
1395 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1396 afs_printable_uint32_lu(singleVolumeNumber),
1397 afs_printable_uint32_lu(q_res.rw));
1398 #ifdef SALVSYNC_BUILD_CLIENT
1399 if (SALVSYNC_LinkVolume(q_res.rw,
1401 salvinfo->fileSysPartition->name,
1403 Log("schedule request failed\n");
1405 #endif /* SALVSYNC_BUILD_CLIENT */
1406 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1409 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1410 osi_Assert(salvinfo->volumeSummaryp != NULL);
1412 salvinfo->nVolumes = 0;
1413 vsp = salvinfo->volumeSummaryp;
1415 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1416 char name[VMAXPATHLEN];
1418 if (!q_res.children[i]) {
1422 /* AskOffline for singleVolumeNumber was called much earlier */
1423 if (q_res.children[i] != singleVolumeNumber) {
1424 AskOffline(salvinfo, q_res.children[i]);
1425 if (LockVolume(salvinfo, q_res.children[i])) {
1431 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1433 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1434 afs_printable_uint32_lu(q_res.children[i]));
1439 DiskToVolumeHeader(&vsp->header, &diskHdr);
1440 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1441 vsp->fileName = ToString(name);
1442 salvinfo->nVolumes++;
1446 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1451 Log("Cannot get volume summary from fileserver; falling back to scanning "
1452 "entire partition\n");
1455 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1460 * count how many volume headers are found by VWalkVolumeHeaders.
1462 * @param[in] dp the disk partition (unused)
1463 * @param[in] name full path to the .vol header (unused)
1464 * @param[in] hdr the header data (unused)
1465 * @param[in] last whether this is the last try or not (unused)
1466 * @param[in] rock actually an afs_int32*; the running count of how many
1467 * volumes we have found
1472 CountHeader(struct DiskPartition64 *dp, const char *name,
1473 struct VolumeDiskHeader *hdr, int last, void *rock)
1475 afs_int32 *nvols = (afs_int32 *)rock;
1481 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1484 struct SalvageScanParams {
1485 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1486 * vol id of the VG we're salvaging */
1487 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1488 * we're filling in */
1489 afs_int32 nVolumes; /**< # of vols we've encountered */
1490 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1491 * # of vols we've alloc'd memory for) */
1492 int retry; /**< do we need to retry vol lock/checkout? */
1493 struct SalvInfo *salvinfo; /**< salvage job info */
1497 * records volume summary info found from VWalkVolumeHeaders.
1499 * Found volumes are also taken offline if they are in the specific volume
1500 * group we are looking for.
1502 * @param[in] dp the disk partition
1503 * @param[in] name full path to the .vol header
1504 * @param[in] hdr the header data
1505 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1506 * @param[in] rock actually a struct SalvageScanParams*, containing the
1507 * information needed to record the volume summary data
1509 * @return operation status
1511 * @retval -1 volume locking raced with fileserver restart; checking out
1512 * and locking volumes needs to be retried
1513 * @retval 1 volume header is mis-named and should be deleted
1516 RecordHeader(struct DiskPartition64 *dp, const char *name,
1517 struct VolumeDiskHeader *hdr, int last, void *rock)
1519 char nameShouldBe[64];
1520 struct SalvageScanParams *params;
1521 struct VolumeSummary summary;
1522 VolumeId singleVolumeNumber;
1523 struct SalvInfo *salvinfo;
1525 params = (struct SalvageScanParams *)rock;
1527 singleVolumeNumber = params->singleVolumeNumber;
1528 salvinfo = params->salvinfo;
1530 DiskToVolumeHeader(&summary.header, hdr);
1532 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1533 && summary.header.parent != singleVolumeNumber) {
1535 if (programType == salvageServer) {
1536 #ifdef SALVSYNC_BUILD_CLIENT
1537 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1538 summary.header.id, summary.header.parent);
1539 if (SALVSYNC_LinkVolume(summary.header.parent,
1543 Log("schedule request failed\n");
1546 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1549 Log("%u is a read-only volume; not salvaged\n",
1550 singleVolumeNumber);
1555 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1556 || summary.header.parent == singleVolumeNumber) {
1558 /* check if the header file is incorrectly named */
1560 const char *base = strrchr(name, OS_DIRSEPC);
1567 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1568 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1571 if (strcmp(nameShouldBe, base)) {
1572 /* .vol file has wrong name; retry/delete */
1576 if (!badname || last) {
1577 /* only offline the volume if the header is good, or if this is
1578 * the last try looking at it; avoid AskOffline'ing the same vol
1581 if (singleVolumeNumber
1582 && summary.header.id != singleVolumeNumber) {
1583 /* don't offline singleVolumeNumber; we already did that
1586 AskOffline(salvinfo, summary.header.id);
1588 #ifdef AFS_DEMAND_ATTACH_FS
1590 /* don't lock the volume if the header is bad, since we're
1591 * about to delete it anyway. */
1592 if (LockVolume(salvinfo, summary.header.id)) {
1597 #endif /* AFS_DEMAND_ATTACH_FS */
1601 if (last && !Showmode) {
1602 Log("Volume header file %s is incorrectly named (should be %s "
1603 "not %s); %sdeleted (it will be recreated later, if "
1604 "necessary)\n", name, nameShouldBe, base,
1605 (Testing ? "it would have been " : ""));
1610 summary.fileName = ToString(base);
1613 if (params->nVolumes > params->totalVolumes) {
1614 /* We found more volumes than we found on the first partition walk;
1615 * apparently something created a volume while we were
1616 * partition-salvaging, or we found more than 20 vols when salvaging a
1617 * particular volume. Abort if we detect this, since other programs
1618 * supposed to not touch the partition while it is partition-salvaging,
1619 * and we shouldn't find more than 20 vols in a VG.
1621 Abort("Found %ld vol headers, but should have found at most %ld! "
1622 "Make sure the volserver/fileserver are not running at the "
1623 "same time as a partition salvage\n",
1624 afs_printable_int32_ld(params->nVolumes),
1625 afs_printable_int32_ld(params->totalVolumes));
1628 memcpy(params->vsp, &summary, sizeof(summary));
1636 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1638 * If the header could not be read in at all, the header is always unlinked.
1639 * If instead RecordHeader said the header was bad (that is, the header file
1640 * is mis-named), we only unlink if we are doing a partition salvage, as
1641 * opposed to salvaging a specific volume group.
1643 * @param[in] dp the disk partition
1644 * @param[in] name full path to the .vol header
1645 * @param[in] hdr header data, or NULL if the header could not be read
1646 * @param[in] rock actually a struct SalvageScanParams*, with some information
1650 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1651 struct VolumeDiskHeader *hdr, void *rock)
1653 struct SalvageScanParams *params;
1656 params = (struct SalvageScanParams *)rock;
1659 /* no header; header is too bogus to read in at all */
1661 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1667 } else if (!params->singleVolumeNumber) {
1668 /* We were able to read in a header, but RecordHeader said something
1669 * was wrong with it. We only unlink those if we are doing a partition
1676 if (dounlink && unlink(name)) {
1677 Log("Error %d while trying to unlink %s\n", errno, name);
1682 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1683 * the fileserver for VG information, or by scanning the /vicepX partition.
1685 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1686 * are salvaging, or 0 if this is a partition
1689 * @return operation status
1691 * @retval -1 we raced with a fileserver restart; checking out and locking
1692 * volumes must be retried
1695 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1697 afs_int32 nvols = 0;
1698 struct SalvageScanParams params;
1701 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1703 /* we successfully got the vol information from the fileserver; no
1704 * need to scan the partition */
1708 /* we need to retry volume checkout */
1712 if (!singleVolumeNumber) {
1713 /* Count how many volumes we have in /vicepX */
1714 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1717 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1722 nvols = VOL_VG_MAX_VOLS;
1725 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1726 osi_Assert(salvinfo->volumeSummaryp != NULL);
1728 params.singleVolumeNumber = singleVolumeNumber;
1729 params.vsp = salvinfo->volumeSummaryp;
1730 params.nVolumes = 0;
1731 params.totalVolumes = nvols;
1733 params.salvinfo = salvinfo;
1735 /* walk the partition directory of volume headers and record the info
1736 * about them; unlinking invalid headers */
1737 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1738 UnlinkHeader, ¶ms);
1740 /* we apparently need to retry checking-out/locking volumes */
1744 Abort("Failed to get volume header summary\n");
1746 salvinfo->nVolumes = params.nVolumes;
1748 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1754 /* Find the link table. This should be associated with the RW volume or, if
1755 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1758 FindLinkHandle(struct InodeSummary *isp, int nVols,
1759 struct ViceInodeInfo *allInodes)
1762 struct ViceInodeInfo *ip;
1764 for (i = 0; i < nVols; i++) {
1765 ip = allInodes + isp[i].index;
1766 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1767 if (ip[j].u.special.type == VI_LINKTABLE)
1768 return ip[j].inodeNumber;
1775 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1777 struct versionStamp version;
1780 if (!VALID_INO(ino))
1782 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1783 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1784 if (!VALID_INO(ino))
1786 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1787 isp->RWvolumeId, errno);
1788 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1789 fdP = IH_OPEN(salvinfo->VGLinkH);
1791 Abort("Can't open link table for volume %u (error = %d)\n",
1792 isp->RWvolumeId, errno);
1794 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1795 Abort("Can't truncate link table for volume %u (error = %d)\n",
1796 isp->RWvolumeId, errno);
1798 version.magic = LINKTABLEMAGIC;
1799 version.version = LINKTABLEVERSION;
1801 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1803 Abort("Can't truncate link table for volume %u (error = %d)\n",
1804 isp->RWvolumeId, errno);
1806 FDH_REALLYCLOSE(fdP);
1808 /* If the volume summary exits (i.e., the V*.vol header file exists),
1809 * then set this inode there as well.
1811 if (isp->volSummary)
1812 isp->volSummary->header.linkTable = ino;
1821 SVGParms_t *parms = (SVGParms_t *) arg;
1822 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1827 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1830 pthread_attr_t tattr;
1834 /* Initialize per volume global variables, even if later code does so */
1835 salvinfo->VolumeChanged = 0;
1836 salvinfo->VGLinkH = NULL;
1837 salvinfo->VGLinkH_cnt = 0;
1838 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1840 parms.svgp_inodeSummaryp = isp;
1841 parms.svgp_count = nVols;
1842 parms.svgp_salvinfo = salvinfo;
1843 code = pthread_attr_init(&tattr);
1845 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1849 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1851 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1854 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1856 Log("Failed to create thread to salvage volume group %u\n",
1860 (void)pthread_join(tid, NULL);
1862 #endif /* AFS_NT40_ENV */
1865 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1867 struct ViceInodeInfo *inodes, *allInodes, *ip;
1868 int i, totalInodes, size, salvageTo;
1872 int dec_VGLinkH = 0;
1874 FdHandle_t *fdP = NULL;
1876 salvinfo->VGLinkH_cnt = 0;
1877 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1878 && isp->nSpecialInodes > 0);
1879 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1880 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1883 if (ShowMounts && !haveRWvolume)
1885 if (canfork && !debug && Fork() != 0) {
1886 (void)Wait("Salvage volume group");
1889 for (i = 0, totalInodes = 0; i < nVols; i++)
1890 totalInodes += isp[i].nInodes;
1891 size = totalInodes * sizeof(struct ViceInodeInfo);
1892 inodes = (struct ViceInodeInfo *)malloc(size);
1893 allInodes = inodes - isp->index; /* this would the base of all the inodes
1894 * for the partition, if all the inodes
1895 * had been read into memory */
1897 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1899 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1901 /* Don't try to salvage a read write volume if there isn't one on this
1903 salvageTo = haveRWvolume ? 0 : 1;
1905 #ifdef AFS_NAMEI_ENV
1906 ino = FindLinkHandle(isp, nVols, allInodes);
1907 if (VALID_INO(ino)) {
1908 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1909 fdP = IH_OPEN(salvinfo->VGLinkH);
1911 if (!VALID_INO(ino) || fdP == NULL) {
1912 Log("%s link table for volume %u.\n",
1913 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1915 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1918 struct ViceInodeInfo *ip;
1919 CreateLinkTable(salvinfo, isp, ino);
1920 fdP = IH_OPEN(salvinfo->VGLinkH);
1921 /* Sync fake 1 link counts to the link table, now that it exists */
1923 for (i = 0; i < nVols; i++) {
1924 ip = allInodes + isp[i].index;
1925 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1926 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1933 FDH_REALLYCLOSE(fdP);
1935 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1938 /* Salvage in reverse order--read/write volume last; this way any
1939 * Inodes not referenced by the time we salvage the read/write volume
1940 * can be picked up by the read/write volume */
1941 /* ACTUALLY, that's not done right now--the inodes just vanish */
1942 for (i = nVols - 1; i >= salvageTo; i--) {
1944 struct InodeSummary *lisp = &isp[i];
1945 #ifdef AFS_NAMEI_ENV
1946 /* If only the RO is present on this partition, the link table
1947 * shows up as a RW volume special file. Need to make sure the
1948 * salvager doesn't try to salvage the non-existent RW.
1950 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1951 /* If this only special inode is the link table, continue */
1952 if (inodes->u.special.type == VI_LINKTABLE) {
1959 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1960 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1961 /* Check inodes twice. The second time do things seriously. This
1962 * way the whole RO volume can be deleted, below, if anything goes wrong */
1963 for (check = 1; check >= 0; check--) {
1965 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1967 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1968 if (rw && deleteMe) {
1969 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1970 * volume won't be called */
1976 if (rw && check == 1)
1978 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1979 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1985 /* Fix actual inode counts */
1988 Log("totalInodes %d\n",totalInodes);
1989 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1990 static int TraceBadLinkCounts = 0;
1991 #ifdef AFS_NAMEI_ENV
1992 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1993 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1994 VGLinkH_p1 = ip->u.param[0];
1995 continue; /* Deal with this last. */
1998 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1999 TraceBadLinkCounts--; /* Limit reports, per volume */
2000 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2002 while (ip->linkCount > 0) {
2003 /* below used to assert, not break */
2005 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2006 Log("idec failed. inode %s errno %d\n",
2007 PrintInode(stmp, ip->inodeNumber), errno);
2013 while (ip->linkCount < 0) {
2014 /* these used to be asserts */
2016 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2017 Log("iinc failed. inode %s errno %d\n",
2018 PrintInode(stmp, ip->inodeNumber), errno);
2025 #ifdef AFS_NAMEI_ENV
2026 while (dec_VGLinkH > 0) {
2027 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2028 Log("idec failed on link table, errno = %d\n", errno);
2032 while (dec_VGLinkH < 0) {
2033 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2034 Log("iinc failed on link table, errno = %d\n", errno);
2041 /* Directory consistency checks on the rw volume */
2043 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2044 IH_RELEASE(salvinfo->VGLinkH);
2046 if (canfork && !debug) {
2053 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2055 /* Check headers BEFORE forking */
2059 for (i = 0; i < nVols; i++) {
2060 struct VolumeSummary *vs = isp[i].volSummary;
2061 VolumeDiskData volHeader;
2063 /* Don't salvage just because phantom rw volume is there... */
2064 /* (If a read-only volume exists, read/write inodes must also exist) */
2065 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2069 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2070 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2071 == sizeof(volHeader)
2072 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2073 && volHeader.dontSalvage == DONT_SALVAGE
2074 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2075 if (volHeader.inUse != 0) {
2076 volHeader.inUse = 0;
2077 volHeader.inService = 1;
2079 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2080 != sizeof(volHeader)) {
2096 /* SalvageVolumeHeaderFile
2098 * Salvage the top level V*.vol header file. Make sure the special files
2099 * exist and that there are no duplicates.
2101 * Calls SalvageHeader for each possible type of volume special file.
2105 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2106 struct ViceInodeInfo *inodes, int RW,
2107 int check, int *deleteMe)
2110 struct ViceInodeInfo *ip;
2111 int allinodesobsolete = 1;
2112 struct VolumeDiskHeader diskHeader;
2113 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2115 struct VolumeHeader tempHeader;
2116 struct afs_inode_info stuff[MAXINODETYPE];
2118 /* keeps track of special inodes that are probably 'good'; they are
2119 * referenced in the vol header, and are included in the given inodes
2124 } goodspecial[MAXINODETYPE];
2129 memset(goodspecial, 0, sizeof(goodspecial));
2131 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2133 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2135 Log("cannot allocate memory for inode skip array when salvaging "
2136 "volume %lu; not performing duplicate special inode recovery\n",
2137 afs_printable_uint32_lu(isp->volumeId));
2138 /* still try to perform the salvage; the skip array only does anything
2139 * if we detect duplicate special inodes */
2142 init_inode_info(&tempHeader, stuff);
2145 * First, look at the special inodes and see if any are referenced by
2146 * the existing volume header. If we find duplicate special inodes, we
2147 * can use this information to use the referenced inode (it's more
2148 * likely to be the 'good' one), and throw away the duplicates.
2150 if (isp->volSummary && skip) {
2151 /* use tempHeader, so we can use the stuff[] array to easily index
2152 * into the isp->volSummary special inodes */
2153 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2155 for (i = 0; i < isp->nSpecialInodes; i++) {
2156 ip = &inodes[isp->index + i];
2157 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2158 /* will get taken care of in a later loop */
2161 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2162 goodspecial[ip->u.special.type-1].valid = 1;
2163 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2168 memset(&tempHeader, 0, sizeof(tempHeader));
2169 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2170 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2171 tempHeader.id = isp->volumeId;
2172 tempHeader.parent = isp->RWvolumeId;
2174 /* Check for duplicates (inodes are sorted by type field) */
2175 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2176 ip = &inodes[isp->index + i];
2177 if (ip->u.special.type == (ip + 1)->u.special.type) {
2178 afs_ino_str_t stmp1, stmp2;
2180 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2181 /* Will be caught in the loop below */
2185 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2186 ip->u.special.type, isp->volumeId,
2187 PrintInode(stmp1, ip->inodeNumber),
2188 PrintInode(stmp2, (ip+1)->inodeNumber));
2190 if (skip && goodspecial[ip->u.special.type-1].valid) {
2191 Inode gi = goodspecial[ip->u.special.type-1].inode;
2194 Log("using special inode referenced by vol header (%s)\n",
2195 PrintInode(stmp1, gi));
2198 /* the volume header references some special inode of
2199 * this type in the inodes array; are we it? */
2200 if (ip->inodeNumber != gi) {
2202 } else if ((ip+1)->inodeNumber != gi) {
2203 /* in case this is the last iteration; we need to
2204 * make sure we check ip+1, too */
2209 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2217 for (i = 0; i < isp->nSpecialInodes; i++) {
2219 ip = &inodes[isp->index + i];
2220 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2222 Log("Rubbish header inode %s of type %d\n",
2223 PrintInode(stmp, ip->inodeNumber),
2224 ip->u.special.type);
2230 Log("Rubbish header inode %s of type %d; deleted\n",
2231 PrintInode(stmp, ip->inodeNumber),
2232 ip->u.special.type);
2233 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2234 if (skip && skip[i]) {
2235 if (orphans == ORPH_REMOVE) {
2236 Log("Removing orphan special inode %s of type %d\n",
2237 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2240 Log("Ignoring orphan special inode %s of type %d\n",
2241 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2242 /* fall through to the ip->linkCount--; line below */
2245 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2246 allinodesobsolete = 0;
2248 if (!check && ip->u.special.type != VI_LINKTABLE)
2249 ip->linkCount--; /* Keep the inode around */
2257 if (allinodesobsolete) {
2264 salvinfo->VGLinkH_cnt++; /* one for every header. */
2266 if (!RW && !check && isp->volSummary) {
2267 ClearROInUseBit(isp->volSummary);
2271 for (i = 0; i < MAXINODETYPE; i++) {
2272 if (stuff[i].inodeType == VI_LINKTABLE) {
2273 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2274 * And we may have recreated the link table earlier, so set the
2275 * RW header as well.
2277 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2278 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2282 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2286 if (isp->volSummary == NULL) {
2288 char headerName[64];
2289 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2290 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2292 Log("No header file for volume %u\n", isp->volumeId);
2296 Log("No header file for volume %u; %screating %s\n",
2297 isp->volumeId, (Testing ? "it would have been " : ""),
2299 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2300 isp->volSummary->fileName = ToString(headerName);
2302 writefunc = VCreateVolumeDiskHeader;
2305 char headerName[64];
2306 /* hack: these two fields are obsolete... */
2307 isp->volSummary->header.volumeAcl = 0;
2308 isp->volSummary->header.volumeMountTable = 0;
2311 (&isp->volSummary->header, &tempHeader,
2312 sizeof(struct VolumeHeader))) {
2313 /* We often remove the name before calling us, so we make a fake one up */
2314 if (isp->volSummary->fileName) {
2315 strcpy(headerName, isp->volSummary->fileName);
2317 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2318 isp->volSummary->fileName = ToString(headerName);
2320 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2322 Log("Header file %s is damaged or no longer valid%s\n", path,
2323 (check ? "" : "; repairing"));
2327 writefunc = VWriteVolumeDiskHeader;
2331 memcpy(&isp->volSummary->header, &tempHeader,
2332 sizeof(struct VolumeHeader));
2335 Log("It would have written a new header file for volume %u\n",
2339 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2340 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2342 Log("Error %ld writing volume header file for volume %lu\n",
2343 afs_printable_int32_ld(code),
2344 afs_printable_uint32_lu(diskHeader.id));
2349 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2350 isp->volSummary->header.volumeInfo);
2355 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2356 struct InodeSummary *isp, int check, int *deleteMe)
2359 VolumeDiskData volumeInfo;
2360 struct versionStamp fileHeader;
2369 #ifndef AFS_NAMEI_ENV
2370 if (sp->inodeType == VI_LINKTABLE)
2373 if (*(sp->inode) == 0) {
2375 Log("Missing inode in volume header (%s)\n", sp->description);
2379 Log("Missing inode in volume header (%s); %s\n", sp->description,
2380 (Testing ? "it would have recreated it" : "recreating"));
2383 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2384 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2385 if (!VALID_INO(*(sp->inode)))
2387 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2388 sp->description, errno);
2393 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2394 fdP = IH_OPEN(specH);
2395 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2396 /* bail out early and destroy the volume */
2398 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2405 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2406 sp->description, errno);
2409 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2410 || header.fileHeader.magic != sp->stamp.magic)) {
2412 Log("Part of the header (%s) is corrupted\n", sp->description);
2413 FDH_REALLYCLOSE(fdP);
2417 Log("Part of the header (%s) is corrupted; recreating\n",
2420 /* header can be garbage; make sure we don't read garbage data from
2422 memset(&header, 0, sizeof(header));
2424 if (sp->inodeType == VI_VOLINFO
2425 && header.volumeInfo.destroyMe == DESTROY_ME) {
2428 FDH_REALLYCLOSE(fdP);
2432 if (recreate && !Testing) {
2435 ("Internal error: recreating volume header (%s) in check mode\n",
2437 nBytes = FDH_TRUNC(fdP, 0);
2439 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2440 sp->description, errno);
2442 /* The following code should be moved into vutil.c */
2443 if (sp->inodeType == VI_VOLINFO) {
2445 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2446 header.volumeInfo.stamp = sp->stamp;
2447 header.volumeInfo.id = isp->volumeId;
2448 header.volumeInfo.parentId = isp->RWvolumeId;
2449 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2450 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2451 isp->volumeId, isp->volumeId);
2452 header.volumeInfo.inService = 0;
2453 header.volumeInfo.blessed = 0;
2454 /* The + 1000 is a hack in case there are any files out in venus caches */
2455 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2456 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2457 header.volumeInfo.needsCallback = 0;
2458 gettimeofday(&tp, 0);
2459 header.volumeInfo.creationDate = tp.tv_sec;
2461 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2462 sizeof(header.volumeInfo), 0);
2463 if (nBytes != sizeof(header.volumeInfo)) {
2466 ("Unable to write volume header file (%s) (errno = %d)\n",
2467 sp->description, errno);
2468 Abort("Unable to write entire volume header file (%s)\n",
2472 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2473 if (nBytes != sizeof(sp->stamp)) {
2476 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2477 sp->description, errno);
2479 ("Unable to write entire version stamp in volume header file (%s)\n",
2484 FDH_REALLYCLOSE(fdP);
2486 if (sp->inodeType == VI_VOLINFO) {
2487 salvinfo->VolInfo = header.volumeInfo;
2491 if (salvinfo->VolInfo.updateDate) {
2492 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2494 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2495 salvinfo->VolInfo.id,
2496 (Testing ? "it would have been " : ""), update);
2498 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2500 Log("%s (%u) not updated (created %s)\n",
2501 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2511 SalvageVnodes(struct SalvInfo *salvinfo,
2512 struct InodeSummary *rwIsp,
2513 struct InodeSummary *thisIsp,
2514 struct ViceInodeInfo *inodes, int check)
2516 int ilarge, ismall, ioffset, RW, nInodes;
2517 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2520 RW = (rwIsp == thisIsp);
2521 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2523 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2524 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2525 if (check && ismall == -1)
2528 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2529 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2530 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2534 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2535 struct ViceInodeInfo *ip, int nInodes,
2536 struct VolumeSummary *volSummary, int check)
2538 char buf[SIZEOF_LARGEDISKVNODE];
2539 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2541 StreamHandle_t *file;
2542 struct VnodeClassInfo *vcp;
2544 afs_sfsize_t nVnodes;
2545 afs_fsize_t vnodeLength;
2547 afs_ino_str_t stmp1, stmp2;
2551 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2552 fdP = IH_OPEN(handle);
2553 osi_Assert(fdP != NULL);
2554 file = FDH_FDOPEN(fdP, "r+");
2555 osi_Assert(file != NULL);
2556 vcp = &VnodeClassInfo[class];
2557 size = OS_SIZE(fdP->fd_fd);
2558 osi_Assert(size != -1);
2559 nVnodes = (size / vcp->diskSize) - 1;
2561 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2562 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2566 for (vnodeIndex = 0;
2567 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2568 nVnodes--, vnodeIndex++) {
2569 if (vnode->type != vNull) {
2570 int vnodeChanged = 0;
2571 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2572 if (VNDISK_GET_INO(vnode) == 0) {
2574 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2575 memset(vnode, 0, vcp->diskSize);
2579 if (vcp->magic != vnode->vnodeMagic) {
2580 /* bad magic #, probably partially created vnode */
2582 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2583 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2584 afs_printable_uint32_lu(vcp->magic));
2585 memset(vnode, 0, vcp->diskSize);
2589 Log("Partially allocated vnode %d deleted.\n",
2591 memset(vnode, 0, vcp->diskSize);
2595 /* ****** Should do a bit more salvage here: e.g. make sure
2596 * vnode type matches what it should be given the index */
2597 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2598 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2599 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2600 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2607 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2608 /* The following doesn't work, because the version number
2609 * is not maintained correctly by the file server */
2610 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2611 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2613 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2619 /* For RW volume, look for vnode with matching inode number;
2620 * if no such match, take the first determined by our sort
2622 struct ViceInodeInfo *lip = ip;
2623 int lnInodes = nInodes;
2625 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2626 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2635 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2636 /* "Matching" inode */
2640 vu = vnode->uniquifier;
2641 iu = ip->u.vnode.vnodeUniquifier;
2642 vd = vnode->dataVersion;
2643 id = ip->u.vnode.inodeDataVersion;
2645 * Because of the possibility of the uniquifier overflows (> 4M)
2646 * we compare them modulo the low 22-bits; we shouldn't worry
2647 * about mismatching since they shouldn't to many old
2648 * uniquifiers of the same vnode...
2650 if (IUnique(vu) != IUnique(iu)) {
2652 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2655 vnode->uniquifier = iu;
2656 #ifdef AFS_3DISPARES
2657 vnode->dataVersion = (id >= vd ?
2660 1887437 ? vd : id) :
2663 1887437 ? id : vd));
2665 #if defined(AFS_SGI_EXMAG)
2666 vnode->dataVersion = (id >= vd ?
2669 15099494 ? vd : id) :
2672 15099494 ? id : vd));
2674 vnode->dataVersion = (id > vd ? id : vd);
2675 #endif /* AFS_SGI_EXMAG */
2676 #endif /* AFS_3DISPARES */
2679 /* don't bother checking for vd > id any more, since
2680 * partial file transfers always result in this state,
2681 * and you can't do much else anyway (you've already
2682 * found the best data you can) */
2683 #ifdef AFS_3DISPARES
2684 if (!vnodeIsDirectory(vnodeNumber)
2685 && ((vd < id && (id - vd) < 1887437)
2686 || ((vd > id && (vd - id) > 1887437)))) {
2688 #if defined(AFS_SGI_EXMAG)
2689 if (!vnodeIsDirectory(vnodeNumber)
2690 && ((vd < id && (id - vd) < 15099494)
2691 || ((vd > id && (vd - id) > 15099494)))) {
2693 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2694 #endif /* AFS_SGI_EXMAG */
2697 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2698 vnode->dataVersion = id;
2703 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2706 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2708 VNDISK_SET_INO(vnode, ip->inodeNumber);
2713 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2715 VNDISK_SET_INO(vnode, ip->inodeNumber);
2718 VNDISK_GET_LEN(vnodeLength, vnode);
2719 if (ip->byteCount != vnodeLength) {
2722 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2727 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2728 VNDISK_SET_LEN(vnode, ip->byteCount);
2732 ip->linkCount--; /* Keep the inode around */
2735 } else { /* no matching inode */
2737 if (VNDISK_GET_INO(vnode) != 0
2738 || vnode->type == vDirectory) {
2739 /* No matching inode--get rid of the vnode */
2741 if (VNDISK_GET_INO(vnode)) {
2743 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2747 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2752 if (VNDISK_GET_INO(vnode)) {
2754 time_t serverModifyTime = vnode->serverModifyTime;
2755 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2759 time_t serverModifyTime = vnode->serverModifyTime;
2760 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2763 memset(vnode, 0, vcp->diskSize);
2766 /* Should not reach here becuase we checked for
2767 * (inodeNumber == 0) above. And where we zero the vnode,
2768 * we also goto vnodeDone.
2772 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2776 } /* VNDISK_GET_INO(vnode) != 0 */
2778 osi_Assert(!(vnodeChanged && check));
2779 if (vnodeChanged && !Testing) {
2780 osi_Assert(IH_IWRITE
2781 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2782 (char *)vnode, vcp->diskSize)
2784 salvinfo->VolumeChanged = 1; /* For break call back */
2795 struct VnodeEssence *
2796 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2799 struct VnodeInfo *vip;
2802 class = vnodeIdToClass(vnodeNumber);
2803 vip = &salvinfo->vnodeInfo[class];
2804 offset = vnodeIdToBitNumber(vnodeNumber);
2805 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2809 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2811 /* Copy the directory unconditionally if we are going to change it:
2812 * not just if was cloned.
2814 struct VnodeDiskObject vnode;
2815 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2816 Inode oldinode, newinode;
2819 if (dir->copied || Testing)
2821 DFlush(); /* Well justified paranoia... */
2824 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2825 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2827 osi_Assert(code == sizeof(vnode));
2828 oldinode = VNDISK_GET_INO(&vnode);
2829 /* Increment the version number by a whole lot to avoid problems with
2830 * clients that were promised new version numbers--but the file server
2831 * crashed before the versions were written to disk.
2834 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2835 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2837 osi_Assert(VALID_INO(newinode));
2838 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2840 VNDISK_SET_INO(&vnode, newinode);
2842 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2843 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2845 osi_Assert(code == sizeof(vnode));
2847 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2848 salvinfo->fileSysDevice, newinode,
2849 &salvinfo->VolumeChanged);
2850 /* Don't delete the original inode right away, because the directory is
2851 * still being scanned.
2857 * This function should either successfully create a new dir, or give up
2858 * and leave things the way they were. In particular, if it fails to write
2859 * the new dir properly, it should return w/o changing the reference to the
2863 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2865 struct VnodeDiskObject vnode;
2866 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2867 Inode oldinode, newinode;
2872 afs_int32 parentUnique = 1;
2873 struct VnodeEssence *vnodeEssence;
2878 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2880 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2881 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2883 osi_Assert(lcode == sizeof(vnode));
2884 oldinode = VNDISK_GET_INO(&vnode);
2885 /* Increment the version number by a whole lot to avoid problems with
2886 * clients that were promised new version numbers--but the file server
2887 * crashed before the versions were written to disk.
2890 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2891 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2893 osi_Assert(VALID_INO(newinode));
2894 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2895 &salvinfo->VolumeChanged);
2897 /* Assign . and .. vnode numbers from dir and vnode.parent.
2898 * The uniquifier for . is in the vnode.
2899 * The uniquifier for .. might be set to a bogus value of 1 and
2900 * the salvager will later clean it up.
2902 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2903 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2906 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2908 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2913 /* didn't really build the new directory properly, let's just give up. */
2914 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2915 Log("Directory salvage returned code %d, continuing.\n", code);
2917 Log("also failed to decrement link count on new inode");
2921 Log("Checking the results of the directory salvage...\n");
2922 if (!DirOK(&newdir)) {
2923 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2924 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2925 osi_Assert(code == 0);
2929 VNDISK_SET_INO(&vnode, newinode);
2930 length = Length(&newdir);
2931 VNDISK_SET_LEN(&vnode, length);
2933 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2934 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2936 osi_Assert(lcode == sizeof(vnode));
2939 nt_sync(salvinfo->fileSysDevice);
2941 sync(); /* this is slow, but hopefully rarely called. We don't have
2942 * an open FD on the file itself to fsync.
2946 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2948 /* make sure old directory file is really closed */
2949 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2950 FDH_REALLYCLOSE(fdP);
2952 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2953 osi_Assert(code == 0);
2954 dir->dirHandle = newdir;
2958 * arguments for JudgeEntry.
2960 struct judgeEntry_params {
2961 struct DirSummary *dir; /**< directory we're examining entries in */
2962 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2966 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2969 struct judgeEntry_params *params = arock;
2970 struct DirSummary *dir = params->dir;
2971 struct SalvInfo *salvinfo = params->salvinfo;
2972 struct VnodeEssence *vnodeEssence;
2973 afs_int32 dirOrphaned, todelete;
2975 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2977 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2978 if (vnodeEssence == NULL) {
2980 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2983 CopyOnWrite(salvinfo, dir);
2984 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2989 #ifndef AFS_NAMEI_ENV
2990 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2991 * mount inode for the partition. If this inode were deleted, it would crash
2994 if (vnodeEssence->InodeNumber == 0) {
2995 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2997 CopyOnWrite(salvinfo, dir);
2998 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3005 if (!(vnodeNumber & 1) && !Showmode
3006 && !(vnodeEssence->count || vnodeEssence->unique
3007 || vnodeEssence->modeBits)) {
3008 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3009 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3010 vnodeNumber, unique,
3011 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3015 CopyOnWrite(salvinfo, dir);
3016 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3022 /* Check if the Uniquifiers match. If not, change the directory entry
3023 * so its unique matches the vnode unique. Delete if the unique is zero
3024 * or if the directory is orphaned.
3026 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3027 if (!vnodeEssence->unique
3028 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3029 /* This is an orphaned directory. Don't delete the . or ..
3030 * entry. Otherwise, it will get created in the next
3031 * salvage and deleted again here. So Just skip it.
3036 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3039 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3043 fid.Vnode = vnodeNumber;
3044 fid.Unique = vnodeEssence->unique;
3045 CopyOnWrite(salvinfo, dir);
3046 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3048 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3051 return 0; /* no need to continue */
3054 if (strcmp(name, ".") == 0) {
3055 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3058 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3060 CopyOnWrite(salvinfo, dir);
3061 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3062 fid.Vnode = dir->vnodeNumber;
3063 fid.Unique = dir->unique;
3064 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3067 vnodeNumber = fid.Vnode; /* Get the new Essence */
3068 unique = fid.Unique;
3069 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3072 } else if (strcmp(name, "..") == 0) {
3075 struct VnodeEssence *dotdot;
3076 pa.Vnode = dir->parent;
3077 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3078 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3079 pa.Unique = dotdot->unique;
3081 pa.Vnode = dir->vnodeNumber;
3082 pa.Unique = dir->unique;
3084 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3086 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3088 CopyOnWrite(salvinfo, dir);
3089 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3090 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3093 vnodeNumber = pa.Vnode; /* Get the new Essence */
3095 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3097 dir->haveDotDot = 1;
3098 } else if (strncmp(name, ".__afs", 6) == 0) {
3100 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3103 CopyOnWrite(salvinfo, dir);
3104 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3106 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3107 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3110 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3111 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3112 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3113 && !(vnodeEssence->modeBits & 0111)) {
3114 afs_sfsize_t nBytes;
3120 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3121 vnodeEssence->InodeNumber);
3124 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3128 size = FDH_SIZE(fdP);
3130 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3131 FDH_REALLYCLOSE(fdP);
3138 nBytes = FDH_PREAD(fdP, buf, size, 0);
3139 if (nBytes == size) {
3141 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3142 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3143 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3144 Testing ? "would convert" : "converted");
3145 vnodeEssence->modeBits |= 0111;
3146 vnodeEssence->changed = 1;
3147 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3148 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3149 dir->name ? dir->name : "??", name, buf);
3151 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3152 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3154 FDH_REALLYCLOSE(fdP);
3157 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3158 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3159 if (vnodeIdToClass(vnodeNumber) == vLarge
3160 && vnodeEssence->name == NULL) {
3162 if ((n = (char *)malloc(strlen(name) + 1)))
3164 vnodeEssence->name = n;
3167 /* The directory entry points to the vnode. Check to see if the
3168 * vnode points back to the directory. If not, then let the
3169 * directory claim it (else it might end up orphaned). Vnodes
3170 * already claimed by another directory are deleted from this
3171 * directory: hardlinks to the same vnode are not allowed
3172 * from different directories.
3174 if (vnodeEssence->parent != dir->vnodeNumber) {
3175 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3176 /* Vnode does not point back to this directory.
3177 * Orphaned dirs cannot claim a file (it may belong to
3178 * another non-orphaned dir).
3181 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3183 vnodeEssence->parent = dir->vnodeNumber;
3184 vnodeEssence->changed = 1;
3186 /* Vnode was claimed by another directory */
3189 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3190 } else if (vnodeNumber == 1) {
3191 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3193 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3197 CopyOnWrite(salvinfo, dir);
3198 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3203 /* This directory claims the vnode */
3204 vnodeEssence->claimed = 1;
3206 vnodeEssence->count--;
3211 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3212 VnodeClass class, Inode ino, Unique * maxu)
3214 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3215 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3216 char buf[SIZEOF_LARGEDISKVNODE];
3217 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3219 StreamHandle_t *file;
3224 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3225 fdP = IH_OPEN(vip->handle);
3226 osi_Assert(fdP != NULL);
3227 file = FDH_FDOPEN(fdP, "r+");
3228 osi_Assert(file != NULL);
3229 size = OS_SIZE(fdP->fd_fd);
3230 osi_Assert(size != -1);
3231 vip->nVnodes = (size / vcp->diskSize) - 1;
3232 if (vip->nVnodes > 0) {
3233 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3234 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3235 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3236 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3237 if (class == vLarge) {
3238 osi_Assert((vip->inodes = (Inode *)
3239 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3248 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3249 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3250 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3251 nVnodes--, vnodeIndex++) {
3252 if (vnode->type != vNull) {
3253 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3254 afs_fsize_t vnodeLength;
3255 vip->nAllocatedVnodes++;
3256 vep->count = vnode->linkCount;
3257 VNDISK_GET_LEN(vnodeLength, vnode);
3258 vep->blockCount = nBlocks(vnodeLength);
3259 vip->volumeBlockCount += vep->blockCount;
3260 vep->parent = vnode->parent;
3261 vep->unique = vnode->uniquifier;
3262 if (*maxu < vnode->uniquifier)
3263 *maxu = vnode->uniquifier;
3264 vep->modeBits = vnode->modeBits;
3265 vep->InodeNumber = VNDISK_GET_INO(vnode);
3266 vep->type = vnode->type;
3267 vep->author = vnode->author;
3268 vep->owner = vnode->owner;
3269 vep->group = vnode->group;
3270 if (vnode->type == vDirectory) {
3271 if (class != vLarge) {
3272 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3273 vip->nAllocatedVnodes--;
3274 memset(vnode, 0, sizeof(vnode));
3275 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3276 vnodeIndexOffset(vcp, vnodeNumber),
3277 (char *)&vnode, sizeof(vnode));
3278 salvinfo->VolumeChanged = 1;
3280 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3289 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3292 struct VnodeEssence *parentvp;
3298 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3299 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3300 strcat(path, OS_DIRSEP);
3301 strcat(path, vp->name);
3307 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3308 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3311 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3313 struct VnodeEssence *vep;
3316 return (1); /* Vnode zero does not exist */
3318 return (0); /* The root dir vnode is always claimed */
3319 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3320 if (!vep || !vep->claimed)
3321 return (1); /* Vnode is not claimed - it is orphaned */
3323 return (IsVnodeOrphaned(salvinfo, vep->parent));
3327 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3328 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3329 struct DirSummary *rootdir, int *rootdirfound)
3331 static struct DirSummary dir;
3332 static struct DirHandle dirHandle;
3333 struct VnodeEssence *parent;
3334 static char path[MAXPATHLEN];
3337 if (dirVnodeInfo->vnodes[i].salvaged)
3338 return; /* already salvaged */
3341 dirVnodeInfo->vnodes[i].salvaged = 1;
3343 if (dirVnodeInfo->inodes[i] == 0)
3344 return; /* Not allocated to a directory */
3346 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3347 if (dirVnodeInfo->vnodes[i].parent) {
3348 Log("Bad parent, vnode 1; %s...\n",
3349 (Testing ? "skipping" : "salvaging"));
3350 dirVnodeInfo->vnodes[i].parent = 0;
3351 dirVnodeInfo->vnodes[i].changed = 1;
3354 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3355 if (parent && parent->salvaged == 0)
3356 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3357 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3358 rootdir, rootdirfound);
3361 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3362 dir.unique = dirVnodeInfo->vnodes[i].unique;
3365 dir.parent = dirVnodeInfo->vnodes[i].parent;
3366 dir.haveDot = dir.haveDotDot = 0;
3367 dir.ds_linkH = alinkH;
3368 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3369 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3371 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3374 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3375 (Testing ? "skipping" : "salvaging"));
3378 CopyAndSalvage(salvinfo, &dir);
3380 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3383 dirHandle = dir.dirHandle;
3386 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3387 &dirVnodeInfo->vnodes[i], path);
3390 /* If enumeration failed for random reasons, we will probably delete
3391 * too much stuff, so we guard against this instead.
3393 struct judgeEntry_params judge_params;
3394 judge_params.salvinfo = salvinfo;
3395 judge_params.dir = &dir;
3397 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3400 /* Delete the old directory if it was copied in order to salvage.
3401 * CopyOnWrite has written the new inode # to the disk, but we still
3402 * have the old one in our local structure here. Thus, we idec the
3406 if (dir.copied && !Testing) {
3407 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3408 osi_Assert(code == 0);
3409 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3412 /* Remember rootdir DirSummary _after_ it has been judged */
3413 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3414 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3422 * Get a new FID that can be used to create a new file.
3424 * @param[in] volHeader vol header for the volume
3425 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3426 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3427 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3428 * updated to the new max unique if we create a new
3432 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3433 VnodeClass class, AFSFid *afid, Unique *maxunique)
3436 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3437 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3441 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3442 /* no free vnodes; make a new one */
3443 salvinfo->vnodeInfo[class].nVnodes++;
3444 salvinfo->vnodeInfo[class].vnodes =
3445 realloc(salvinfo->vnodeInfo[class].vnodes,
3446 sizeof(struct VnodeEssence) * (i+1));
3448 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3451 afid->Vnode = bitNumberToVnodeNumber(i, class);
3453 if (volHeader->uniquifier < (*maxunique + 1)) {
3454 /* header uniq is bad; it will get bumped by 2000 later */
3455 afid->Unique = *maxunique + 1 + 2000;
3458 /* header uniq seems okay; just use that */
3459 afid->Unique = *maxunique = volHeader->uniquifier++;
3464 * Create a vnode for a README file explaining not to use a recreated-root vol.
3466 * @param[in] volHeader vol header for the volume
3467 * @param[in] alinkH ihandle for i/o for the volume
3468 * @param[in] vid volume id
3469 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3470 * updated to the new max unique if we create a new
3472 * @param[out] afid FID for the new readme vnode
3473 * @param[out] ainode the inode for the new readme file
3475 * @return operation status
3480 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3481 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3485 struct VnodeDiskObject *rvnode = NULL;
3487 IHandle_t *readmeH = NULL;
3488 struct VnodeEssence *vep;
3490 time_t now = time(NULL);
3492 /* Try to make the note brief, but informative. Only administrators should
3493 * be able to read this file at first, so we can hopefully assume they
3494 * know what AFS is, what a volume is, etc. */
3496 "This volume has been salvaged, but has lost its original root directory.\n"
3497 "The root directory that exists now has been recreated from orphan files\n"
3498 "from the rest of the volume. This recreated root directory may interfere\n"
3499 "with old cached data on clients, and there is no way the salvager can\n"
3500 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3501 "use this volume, but only copy the salvaged data to a new volume.\n"
3502 "Continuing to use this volume as it exists now may cause some clients to\n"
3503 "behave oddly when accessing this volume.\n"
3504 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3505 /* ^ the person reading this probably just lost some data, so they could
3506 * use some cheering up. */
3508 /* -1 for the trailing NUL */
3509 length = sizeof(readme) - 1;
3511 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3513 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3515 /* create the inode and write the contents */
3516 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3517 salvinfo->fileSysPath, 0, vid,
3518 afid->Vnode, afid->Unique, 1);
3519 if (!VALID_INO(readmeinode)) {
3520 Log("CreateReadme: readme IH_CREATE failed\n");
3524 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3525 bytes = IH_IWRITE(readmeH, 0, readme, length);
3526 IH_RELEASE(readmeH);
3528 if (bytes != length) {
3529 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3530 (int)sizeof(readme));
3534 /* create the vnode and write it out */
3535 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3537 Log("CreateRootDir: error alloc'ing memory\n");
3541 rvnode->type = vFile;
3543 rvnode->modeBits = 0777;
3544 rvnode->linkCount = 1;
3545 VNDISK_SET_LEN(rvnode, length);
3546 rvnode->uniquifier = afid->Unique;
3547 rvnode->dataVersion = 1;
3548 VNDISK_SET_INO(rvnode, readmeinode);
3549 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3554 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3556 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3557 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3558 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3560 if (bytes != SIZEOF_SMALLDISKVNODE) {
3561 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3562 (int)SIZEOF_SMALLDISKVNODE);
3566 /* update VnodeEssence for new readme vnode */
3567 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3569 vep->blockCount = nBlocks(length);
3570 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3571 vep->parent = rvnode->parent;
3572 vep->unique = rvnode->uniquifier;
3573 vep->modeBits = rvnode->modeBits;
3574 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3575 vep->type = rvnode->type;
3576 vep->author = rvnode->author;
3577 vep->owner = rvnode->owner;
3578 vep->group = rvnode->group;
3588 *ainode = readmeinode;
3593 if (IH_DEC(alinkH, readmeinode, vid)) {
3594 Log("CreateReadme (recovery): IH_DEC failed\n");
3606 * create a root dir for a volume that lacks one.
3608 * @param[in] volHeader vol header for the volume
3609 * @param[in] alinkH ihandle for disk access for this volume group
3610 * @param[in] vid volume id we're dealing with
3611 * @param[out] rootdir populated with info about the new root dir
3612 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3613 * updated to the new max unique if we create a new
3616 * @return operation status
3621 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3622 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3626 int decroot = 0, decreadme = 0;
3627 AFSFid did, readmeid;
3630 struct VnodeDiskObject *rootvnode = NULL;
3631 struct acl_accessList *ACL;
3634 struct VnodeEssence *vep;
3636 time_t now = time(NULL);
3638 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3639 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3643 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3644 /* We don't have any large vnodes in the volume; allocate room
3645 * for one so we can recreate the root dir */
3646 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3647 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3648 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3650 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3651 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3654 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3655 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3656 if (vep->type != vNull) {
3657 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3661 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3662 &readmeinode) != 0) {
3667 /* set the DV to a very high number, so it is unlikely that we collide
3668 * with a cached DV */
3671 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3673 if (!VALID_INO(rootinode)) {
3674 Log("CreateRootDir: IH_CREATE failed\n");
3679 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3680 rootinode, &salvinfo->VolumeChanged);
3684 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3685 Log("CreateRootDir: MakeDir failed\n");
3688 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3689 Log("CreateRootDir: Create failed\n");
3693 length = Length(&rootdir->dirHandle);
3694 DZap((void *)&rootdir->dirHandle);
3696 /* create the new root dir vnode */
3697 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3699 Log("CreateRootDir: malloc failed\n");
3703 /* only give 'rl' permissions to 'system:administrators'. We do this to
3704 * try to catch the attention of an administrator, that they should not
3705 * be writing to this directory or continue to use it. */
3706 ACL = VVnodeDiskACL(rootvnode);
3707 ACL->size = sizeof(struct acl_accessList);
3708 ACL->version = ACL_ACLVERSION;
3712 ACL->entries[0].id = -204; /* system:administrators */
3713 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3715 rootvnode->type = vDirectory;
3716 rootvnode->cloned = 0;
3717 rootvnode->modeBits = 0777;
3718 rootvnode->linkCount = 2;
3719 VNDISK_SET_LEN(rootvnode, length);
3720 rootvnode->uniquifier = 1;
3721 rootvnode->dataVersion = dv;
3722 VNDISK_SET_INO(rootvnode, rootinode);
3723 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3724 rootvnode->author = 0;
3725 rootvnode->owner = 0;
3726 rootvnode->parent = 0;
3727 rootvnode->group = 0;
3728 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3730 /* write it out to disk */
3731 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3732 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3733 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3735 if (bytes != SIZEOF_LARGEDISKVNODE) {
3736 /* just cast to int and don't worry about printing real 64-bit ints;
3737 * a large disk vnode isn't anywhere near the 32-bit limit */
3738 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3739 (int)SIZEOF_LARGEDISKVNODE);
3743 /* update VnodeEssence for the new root vnode */
3744 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3746 vep->blockCount = nBlocks(length);
3747 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3748 vep->parent = rootvnode->parent;
3749 vep->unique = rootvnode->uniquifier;
3750 vep->modeBits = rootvnode->modeBits;
3751 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3752 vep->type = rootvnode->type;
3753 vep->author = rootvnode->author;
3754 vep->owner = rootvnode->owner;
3755 vep->group = rootvnode->group;
3765 /* update DirSummary for the new root vnode */
3766 rootdir->vnodeNumber = 1;
3767 rootdir->unique = 1;
3768 rootdir->haveDot = 1;
3769 rootdir->haveDotDot = 1;
3770 rootdir->rwVid = vid;
3771 rootdir->copied = 0;
3772 rootdir->parent = 0;
3773 rootdir->name = strdup(".");
3774 rootdir->vname = volHeader->name;
3775 rootdir->ds_linkH = alinkH;
3782 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3783 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3785 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3786 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3796 * salvage a volume group.
3798 * @param[in] salvinfo information for the curent salvage job
3799 * @param[in] rwIsp inode summary for rw volume
3800 * @param[in] alinkH link table inode handle
3802 * @return operation status
3806 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3808 /* This routine, for now, will only be called for read-write volumes */
3810 int BlocksInVolume = 0, FilesInVolume = 0;
3812 struct DirSummary rootdir, oldrootdir;
3813 struct VnodeInfo *dirVnodeInfo;
3814 struct VnodeDiskObject vnode;
3815 VolumeDiskData volHeader;
3817 int orphaned, rootdirfound = 0;
3818 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3819 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3820 struct VnodeEssence *vep;
3823 afs_sfsize_t nBytes;
3825 VnodeId LFVnode, ThisVnode;
3826 Unique LFUnique, ThisUnique;
3830 vid = rwIsp->volSummary->header.id;
3831 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3832 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3833 osi_Assert(nBytes == sizeof(volHeader));
3834 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3835 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3836 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3838 DistilVnodeEssence(salvinfo, vid, vLarge,
3839 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3840 DistilVnodeEssence(salvinfo, vid, vSmall,
3841 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3843 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3844 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3845 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3846 &rootdir, &rootdirfound);
3849 nt_sync(salvinfo->fileSysDevice);
3851 sync(); /* This used to be done lower level, for every dir */
3858 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3860 Log("Cannot find root directory for volume %lu; attempting to create "
3861 "a new one\n", afs_printable_uint32_lu(vid));
3863 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3868 salvinfo->VolumeChanged = 1;
3872 /* Parse each vnode looking for orphaned vnodes and
3873 * connect them to the tree as orphaned (if requested).
3875 oldrootdir = rootdir;
3876 for (class = 0; class < nVNODECLASSES; class++) {
3877 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3878 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3879 ThisVnode = bitNumberToVnodeNumber(v, class);
3880 ThisUnique = vep->unique;
3882 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3883 continue; /* Ignore unused, claimed, and root vnodes */
3885 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3886 * entry in this vnode had incremented the parent link count (In
3887 * JudgeEntry()). We need to go to the parent and decrement that
3888 * link count. But if the parent's unique is zero, then the parent
3889 * link count was not incremented in JudgeEntry().
3891 if (class == vLarge) { /* directory vnode */
3892 pv = vnodeIdToBitNumber(vep->parent);
3893 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3894 if (vep->parent == 1 && newrootdir) {
3895 /* this vnode's parent was the volume root, and
3896 * we just created the volume root. So, the parent
3897 * dir didn't exist during JudgeEntry, so the link
3898 * count was not inc'd there, so don't dec it here.
3904 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3910 continue; /* If no rootdir, can't attach orphaned files */
3912 /* Here we attach orphaned files and directories into the
3913 * root directory, LVVnode, making sure link counts stay correct.
3915 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3916 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3917 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3919 /* Update this orphaned vnode's info. Its parent info and
3920 * link count (do for orphaned directories and files).
3922 vep->parent = LFVnode; /* Parent is the root dir */
3923 vep->unique = LFUnique;
3926 vep->count--; /* Inc link count (root dir will pt to it) */
3928 /* If this orphaned vnode is a directory, change '..'.
3929 * The name of the orphaned dir/file is unknown, so we
3930 * build a unique name. No need to CopyOnWrite the directory
3931 * since it is not connected to tree in BK or RO volume and
3932 * won't be visible there.
3934 if (class == vLarge) {
3938 /* Remove and recreate the ".." entry in this orphaned directory */
3939 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3940 salvinfo->vnodeInfo[class].inodes[v],
3941 &salvinfo->VolumeChanged);
3943 pa.Unique = LFUnique;
3944 osi_Assert(Delete(&dh, "..") == 0);
3945 osi_Assert(Create(&dh, "..", &pa) == 0);
3947 /* The original parent's link count was decremented above.
3948 * Here we increment the new parent's link count.
3950 pv = vnodeIdToBitNumber(LFVnode);
3951 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3955 /* Go to the root dir and add this entry. The link count of the
3956 * root dir was incremented when ".." was created. Try 10 times.
3958 for (j = 0; j < 10; j++) {
3959 pa.Vnode = ThisVnode;
3960 pa.Unique = ThisUnique;
3962 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3964 vLarge) ? "__ORPHANDIR__" :
3965 "__ORPHANFILE__"), ThisVnode,
3968 CopyOnWrite(salvinfo, &rootdir);
3969 code = Create(&rootdir.dirHandle, npath, &pa);
3973 ThisUnique += 50; /* Try creating a different file */
3975 osi_Assert(code == 0);
3976 Log("Attaching orphaned %s to volume's root dir as %s\n",
3977 ((class == vLarge) ? "directory" : "file"), npath);
3979 } /* for each vnode in the class */
3980 } /* for each class of vnode */
3982 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3984 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3986 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3988 osi_Assert(code == 0);
3989 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3992 DFlush(); /* Flush the changes */
3993 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3994 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3995 orphans = ORPH_IGNORE;
3998 /* Write out all changed vnodes. Orphaned files and directories
3999 * will get removed here also (if requested).
4001 for (class = 0; class < nVNODECLASSES; class++) {
4002 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4003 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4004 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4005 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4006 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4007 for (i = 0; i < nVnodes; i++) {
4008 struct VnodeEssence *vnp = &vnodes[i];
4009 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4011 /* If the vnode is good but is unclaimed (not listed in
4012 * any directory entries), then it is orphaned.
4015 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4016 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4020 if (vnp->changed || vnp->count) {
4023 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4024 vnodeIndexOffset(vcp, vnodeNumber),
4025 (char *)&vnode, sizeof(vnode));
4026 osi_Assert(nBytes == sizeof(vnode));
4028 vnode.parent = vnp->parent;
4029 oldCount = vnode.linkCount;
4030 vnode.linkCount = vnode.linkCount - vnp->count;
4033 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4035 if (!vnp->todelete) {
4036 /* Orphans should have already been attached (if requested) */
4037 osi_Assert(orphans != ORPH_ATTACH);
4038 oblocks += vnp->blockCount;
4041 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4043 BlocksInVolume -= vnp->blockCount;
4045 if (VNDISK_GET_INO(&vnode)) {
4047 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4048 osi_Assert(code == 0);
4050 memset(&vnode, 0, sizeof(vnode));
4052 } else if (vnp->count) {
4054 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4057 vnode.modeBits = vnp->modeBits;
4060 vnode.dataVersion++;
4063 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4064 vnodeIndexOffset(vcp, vnodeNumber),
4065 (char *)&vnode, sizeof(vnode));
4066 osi_Assert(nBytes == sizeof(vnode));
4068 salvinfo->VolumeChanged = 1;
4072 if (!Showmode && ofiles) {
4073 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4075 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4079 for (class = 0; class < nVNODECLASSES; class++) {
4080 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4081 for (i = 0; i < vip->nVnodes; i++)
4082 if (vip->vnodes[i].name)
4083 free(vip->vnodes[i].name);
4090 /* Set correct resource utilization statistics */
4091 volHeader.filecount = FilesInVolume;
4092 volHeader.diskused = BlocksInVolume;
4094 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4095 if (volHeader.uniquifier < (maxunique + 1)) {
4097 Log("Volume uniquifier is too low; fixed\n");
4098 /* Plus 2,000 in case there are workstations out there with
4099 * cached vnodes that have since been deleted
4101 volHeader.uniquifier = (maxunique + 1 + 2000);
4105 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4106 "Only use this salvaged volume to copy data to another volume; "
4107 "do not continue to use this volume (%lu) as-is.\n",
4108 afs_printable_uint32_lu(vid));
4111 #ifdef FSSYNC_BUILD_CLIENT
4112 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4113 afs_int32 fsync_code;
4115 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4117 Log("Error trying to tell the fileserver to break callbacks for "
4118 "changed volume %lu; error code %ld\n",
4119 afs_printable_uint32_lu(vid),
4120 afs_printable_int32_ld(fsync_code));
4122 salvinfo->VolumeChanged = 0;
4125 #endif /* FSSYNC_BUILD_CLIENT */
4127 /* Turn off the inUse bit; the volume's been salvaged! */
4128 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4129 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4130 volHeader.inService = 1; /* allow service again */
4131 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4132 volHeader.dontSalvage = DONT_SALVAGE;
4133 salvinfo->VolumeChanged = 0;
4135 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4136 osi_Assert(nBytes == sizeof(volHeader));
4139 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4140 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4141 FilesInVolume, BlocksInVolume);
4144 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4145 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4151 ClearROInUseBit(struct VolumeSummary *summary)
4153 IHandle_t *h = summary->volumeInfoHandle;
4154 afs_sfsize_t nBytes;
4156 VolumeDiskData volHeader;
4158 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4159 osi_Assert(nBytes == sizeof(volHeader));
4160 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4161 volHeader.inUse = 0;
4162 volHeader.needsSalvaged = 0;
4163 volHeader.inService = 1;
4164 volHeader.dontSalvage = DONT_SALVAGE;
4166 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4167 osi_Assert(nBytes == sizeof(volHeader));
4172 * Possible delete the volume.
4174 * deleteMe - Always do so, only a partial volume.
4177 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4178 char *message, int deleteMe, int check)
4180 if (readOnly(isp) || deleteMe) {
4181 if (isp->volSummary && isp->volSummary->fileName) {
4184 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4186 Log("It will be deleted on this server (you may find it elsewhere)\n");
4189 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4191 Log("it will be deleted instead. It should be recloned.\n");
4196 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4198 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4200 Log("Error %ld destroying volume disk header for volume %lu\n",
4201 afs_printable_int32_ld(code),
4202 afs_printable_uint32_lu(isp->volumeId));
4205 /* make sure we actually delete the fileName file; ENOENT
4206 * is fine, since VDestroyVolumeDiskHeader probably already
4208 if (unlink(path) && errno != ENOENT) {
4209 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4211 if (salvinfo->useFSYNC) {
4212 AskDelete(salvinfo, isp->volumeId);
4214 isp->volSummary->deleted = 1;
4217 } else if (!check) {
4218 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4220 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4224 #ifdef AFS_DEMAND_ATTACH_FS
4226 * Locks a volume on disk for salvaging.
4228 * @param[in] volumeId volume ID to lock
4230 * @return operation status
4232 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4233 * checked out and locked again
4238 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4243 /* should always be WRITE_LOCK, but keep the lock-type logic all
4244 * in one place, in VVolLockType. Params will be ignored, but
4245 * try to provide what we're logically doing. */
4246 locktype = VVolLockType(V_VOLUPD, 1);
4248 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4250 if (code == EBUSY) {
4251 Abort("Someone else appears to be using volume %lu; Aborted\n",
4252 afs_printable_uint32_lu(volumeId));
4254 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4255 afs_printable_int32_ld(code),
4256 afs_printable_uint32_lu(volumeId));
4259 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4260 if (code == SYNC_DENIED) {
4261 /* need to retry checking out volumes */
4264 if (code != SYNC_OK) {
4265 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4266 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4269 /* set inUse = programType in the volume header to ensure that nobody
4270 * tries to use this volume again without salvaging, if we somehow crash
4271 * or otherwise exit before finishing the salvage.
4275 struct VolumeHeader header;
4276 struct VolumeDiskHeader diskHeader;
4277 struct VolumeDiskData volHeader;
4279 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4284 DiskToVolumeHeader(&header, &diskHeader);
4286 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4287 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4288 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4294 volHeader.inUse = programType;
4296 /* If we can't re-write the header, bail out and error. We don't
4297 * assert when reading the header, since it's possible the
4298 * header isn't really there (when there's no data associated
4299 * with the volume; we just delete the vol header file in that
4300 * case). But if it's there enough that we can read it, but
4301 * somehow we cannot write to it to signify we're salvaging it,
4302 * we've got a big problem and we cannot continue. */
4303 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4310 #endif /* AFS_DEMAND_ATTACH_FS */
4313 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4318 memset(&res, 0, sizeof(res));
4320 for (i = 0; i < 3; i++) {
4321 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4322 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4324 if (code == SYNC_OK) {
4326 } else if (code == SYNC_DENIED) {
4328 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4330 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4331 Abort("Salvage aborted\n");
4332 } else if (code == SYNC_BAD_COMMAND) {
4333 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4336 #ifdef AFS_DEMAND_ATTACH_FS
4337 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4339 Log("AskOffline: fileserver is DAFS but we are not.\n");
4342 #ifdef AFS_DEMAND_ATTACH_FS
4343 Log("AskOffline: fileserver is not DAFS but we are.\n");
4345 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4348 Abort("Salvage aborted\n");
4351 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4352 FSYNC_clientFinis();
4356 if (code != SYNC_OK) {
4357 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4358 Abort("Salvage aborted\n");
4362 /* don't want to pass around state; remember it here */
4363 static int isDAFS = -1;
4367 afs_int32 code, i, ret = 0;
4370 /* we don't care if we race. the answer shouldn't change */
4374 memset(&res, 0, sizeof(res));
4376 for (i = 0; i < 3; i++) {
4377 code = FSYNC_VolOp(1, NULL,
4378 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4380 if (code == SYNC_OK) {
4383 } else if (code == SYNC_DENIED) {
4386 } else if (code == SYNC_BAD_COMMAND) {
4389 } else if (code == SYNC_FAILED) {
4390 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4396 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4397 FSYNC_clientFinis();
4407 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4411 for (i = 0; i < 3; i++) {
4412 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4413 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4415 if (code == SYNC_OK) {
4417 } else if (code == SYNC_DENIED) {
4418 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4419 } else if (code == SYNC_BAD_COMMAND) {
4420 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4422 Log("AskOnline: please make sure file server binaries are same version.\n");
4426 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4427 FSYNC_clientFinis();
4434 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4439 for (i = 0; i < 3; i++) {
4440 memset(&res, 0, sizeof(res));
4441 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4442 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4444 if (code == SYNC_OK) {
4446 } else if (code == SYNC_DENIED) {
4447 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4448 } else if (code == SYNC_BAD_COMMAND) {
4449 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4452 #ifdef AFS_DEMAND_ATTACH_FS
4453 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4455 Log("AskOnline: fileserver is DAFS but we are not.\n");
4458 #ifdef AFS_DEMAND_ATTACH_FS
4459 Log("AskOnline: fileserver is not DAFS but we are.\n");
4461 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4465 } else if (code == SYNC_FAILED &&
4466 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4467 res.hdr.reason == FSYNC_WRONG_PART)) {
4468 /* volume is already effectively 'deleted' */
4472 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4473 FSYNC_clientFinis();
4480 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4482 /* Volume parameter is passed in case iopen is upgraded in future to
4483 * require a volume Id to be passed
4486 IHandle_t *srcH, *destH;
4487 FdHandle_t *srcFdP, *destFdP;
4489 afs_foff_t size = 0;
4491 IH_INIT(srcH, device, rwvolume, inode1);
4492 srcFdP = IH_OPEN(srcH);
4493 osi_Assert(srcFdP != NULL);
4494 IH_INIT(destH, device, rwvolume, inode2);
4495 destFdP = IH_OPEN(destH);
4496 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4497 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4500 osi_Assert(nBytes == 0);
4501 FDH_REALLYCLOSE(srcFdP);
4502 FDH_REALLYCLOSE(destFdP);
4509 PrintInodeList(struct SalvInfo *salvinfo)
4511 struct ViceInodeInfo *ip;
4512 struct ViceInodeInfo *buf;
4515 afs_sfsize_t st_size;
4517 st_size = OS_SIZE(salvinfo->inodeFd);
4518 osi_Assert(st_size >= 0);
4519 buf = (struct ViceInodeInfo *)malloc(st_size);
4520 osi_Assert(buf != NULL);
4521 nInodes = st_size / sizeof(struct ViceInodeInfo);
4522 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4523 for (ip = buf; nInodes--; ip++) {
4524 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4525 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4526 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4527 ip->u.param[2], ip->u.param[3]);
4533 PrintInodeSummary(struct SalvInfo *salvinfo)
4536 struct InodeSummary *isp;
4538 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4539 isp = &salvinfo->inodeSummary[i];
4540 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4545 PrintVolumeSummary(struct SalvInfo *salvinfo)
4548 struct VolumeSummary *vsp;
4550 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4551 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4561 osi_Assert(0); /* Fork is never executed in the NT code path */
4565 #ifdef AFS_DEMAND_ATTACH_FS
4566 if ((f == 0) && (programType == salvageServer)) {
4567 /* we are a salvageserver child */
4568 #ifdef FSSYNC_BUILD_CLIENT
4569 VChildProcReconnectFS_r();
4571 #ifdef SALVSYNC_BUILD_CLIENT
4575 #endif /* AFS_DEMAND_ATTACH_FS */
4576 #endif /* !AFS_NT40_ENV */
4586 #ifdef AFS_DEMAND_ATTACH_FS
4587 if (programType == salvageServer) {
4588 #ifdef SALVSYNC_BUILD_CLIENT
4591 #ifdef FSSYNC_BUILD_CLIENT
4595 #endif /* AFS_DEMAND_ATTACH_FS */
4598 if (main_thread != pthread_self())
4599 pthread_exit((void *)code);
4612 pid = wait(&status);
4613 osi_Assert(pid != -1);
4614 if (WCOREDUMP(status))
4615 Log("\"%s\" core dumped!\n", prog);
4616 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4622 TimeStamp(time_t clock, int precision)
4625 static char timestamp[20];
4626 lt = localtime(&clock);
4628 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4630 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4635 CheckLogFile(char * log_path)
4637 char oldSlvgLog[AFSDIR_PATH_MAX];
4639 #ifndef AFS_NT40_ENV
4646 strcpy(oldSlvgLog, log_path);
4647 strcat(oldSlvgLog, ".old");
4649 renamefile(log_path, oldSlvgLog);
4650 logFile = afs_fopen(log_path, "a");
4652 if (!logFile) { /* still nothing, use stdout */
4656 #ifndef AFS_NAMEI_ENV
4657 AFS_DEBUG_IOPS_LOG(logFile);
4662 #ifndef AFS_NT40_ENV
4664 TimeStampLogFile(char * log_path)
4666 char stampSlvgLog[AFSDIR_PATH_MAX];
4671 lt = localtime(&now);
4672 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4673 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4674 log_path, lt->tm_year + 1900,
4675 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4678 /* try to link the logfile to a timestamped filename */
4679 /* if it fails, oh well, nothing we can do */
4680 link(log_path, stampSlvgLog);
4689 #ifndef AFS_NT40_ENV
4691 printf("Can't show log since using syslog.\n");
4702 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4705 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4708 while (fgets(line, sizeof(line), logFile))
4715 Log(const char *format, ...)
4721 va_start(args, format);
4722 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4724 #ifndef AFS_NT40_ENV
4726 syslog(LOG_INFO, "%s", tmp);
4730 gettimeofday(&now, 0);
4731 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4737 Abort(const char *format, ...)
4742 va_start(args, format);
4743 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4745 #ifndef AFS_NT40_ENV
4747 syslog(LOG_INFO, "%s", tmp);
4751 fprintf(logFile, "%s", tmp);
4763 ToString(const char *s)
4766 p = (char *)malloc(strlen(s) + 1);
4767 osi_Assert(p != NULL);
4772 /* Remove the FORCESALVAGE file */
4774 RemoveTheForce(char *path)
4777 struct afs_stat_st force; /* so we can use afs_stat to find it */
4778 strcpy(target,path);
4779 strcat(target,"/FORCESALVAGE");
4780 if (!Testing && ForceSalvage) {
4781 if (afs_stat(target,&force) == 0) unlink(target);
4785 #ifndef AFS_AIX32_ENV
4787 * UseTheForceLuke - see if we can use the force
4790 UseTheForceLuke(char *path)
4792 struct afs_stat_st force;
4794 strcpy(target,path);
4795 strcat(target,"/FORCESALVAGE");
4797 return (afs_stat(target, &force) == 0);
4801 * UseTheForceLuke - see if we can use the force
4804 * The VRMIX fsck will not muck with the filesystem it is supposedly
4805 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4806 * muck directly with the root inode, which is within the normal
4808 * ListViceInodes() has a side effect of setting ForceSalvage if
4809 * it detects a need, based on root inode examination.
4812 UseTheForceLuke(char *path)
4815 return 0; /* sorry OB1 */
4820 /* NT support routines */
4822 static char execpathname[MAX_PATH];
4824 nt_SalvagePartition(char *partName, int jobn)
4829 if (!*execpathname) {
4830 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4831 if (!n || n == 1023)
4834 job.cj_magic = SALVAGER_MAGIC;
4835 job.cj_number = jobn;
4836 (void)strcpy(job.cj_part, partName);
4837 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4842 nt_SetupPartitionSalvage(void *datap, int len)
4844 childJob_t *jobp = (childJob_t *) datap;
4845 char logname[AFSDIR_PATH_MAX];
4847 if (len != sizeof(childJob_t))
4849 if (jobp->cj_magic != SALVAGER_MAGIC)
4854 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4856 logFile = afs_fopen(logname, "w");
4864 #endif /* AFS_NT40_ENV */