2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
207 /*@+fcnmacros +macrofcndecl@*/
210 extern off64_t afs_lseek(int FD, off64_t O, int F);
211 #endif /*S_SPLINT_S */
212 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
213 #define afs_stat stat64
214 #define afs_fstat fstat64
215 #define afs_open open64
216 #define afs_fopen fopen64
217 #else /* !O_LARGEFILE */
219 extern off_t afs_lseek(int FD, off_t O, int F);
220 #endif /*S_SPLINT_S */
221 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
222 #define afs_stat stat
223 #define afs_fstat fstat
224 #define afs_open open
225 #define afs_fopen fopen
226 #endif /* !O_LARGEFILE */
227 /*@=fcnmacros =macrofcndecl@*/
230 extern void *calloc();
232 static char *TimeStamp(time_t clock, int precision);
235 int debug; /* -d flag */
236 extern int Testing; /* -n flag */
237 int ListInodeOption; /* -i flag */
238 int ShowRootFiles; /* -r flag */
239 int RebuildDirs; /* -sal flag */
240 int Parallel = 4; /* -para X flag */
241 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
242 int forceR = 0; /* -b flag */
243 int ShowLog = 0; /* -showlog flag */
244 int ShowSuid = 0; /* -showsuid flag */
245 int ShowMounts = 0; /* -showmounts flag */
246 int orphans = ORPH_IGNORE; /* -orphans option */
251 int useSyslog = 0; /* -syslog flag */
252 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
261 #define MAXPARALLEL 32
263 int OKToZap; /* -o flag */
264 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
265 * in the volume header */
267 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
269 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
272 * information that is 'global' to a particular salvage job.
275 Device fileSysDevice; /**< The device number of the current partition
277 char fileSysPath[8]; /**< The path of the mounted partition currently
278 * being salvaged, i.e. the directory containing
279 * the volume headers */
280 char *fileSysPathName; /**< NT needs this to make name pretty log. */
281 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
282 int VGLinkH_cnt; /**< # of references to lnk handle. */
283 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
286 char *fileSysDeviceName; /**< The block device where the file system being
287 * salvaged was mounted */
288 char *filesysfulldev;
290 int VolumeChanged; /**< Set by any routine which would change the
291 * volume in a way which would require callbacks
292 * to be broken if the volume was put back on
293 * on line by an active file server */
295 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
296 * header dealt with */
298 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
299 int inodeFd; /**< File descriptor for inode file */
301 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
302 int nVolumes; /**< Number of volumes (read-write and read-only)
303 * in volume summary */
304 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
307 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
308 * vnodes in the volume that
309 * we are currently looking
311 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
312 * to contact the fileserver over FSYNC */
319 /* Forward declarations */
320 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
321 static int AskVolumeSummary(struct SalvInfo *salvinfo,
322 VolumeId singleVolumeNumber);
324 #ifdef AFS_DEMAND_ATTACH_FS
325 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
326 #endif /* AFS_DEMAND_ATTACH_FS */
328 /* Uniquifier stored in the Inode */
333 return (u & 0x3fffff);
335 #if defined(AFS_SGI_EXMAG)
336 return (u & SGI_UNIQMASK);
339 #endif /* AFS_SGI_EXMAG */
346 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
348 return 0; /* otherwise may be transient, e.g. EMFILE */
353 char *save_args[MAX_ARGS];
355 extern pthread_t main_thread;
356 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
360 * Get the salvage lock if not already held. Hold until process exits.
362 * @param[in] locktype READ_LOCK or WRITE_LOCK
365 _ObtainSalvageLock(int locktype)
367 struct VLockFile salvageLock;
372 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
374 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
377 "salvager: There appears to be another salvager running! "
382 "salvager: Error %d trying to acquire salvage lock! "
388 ObtainSalvageLock(void)
390 _ObtainSalvageLock(WRITE_LOCK);
393 ObtainSharedSalvageLock(void)
395 _ObtainSalvageLock(READ_LOCK);
399 #ifdef AFS_SGI_XFS_IOPS_ENV
400 /* Check if the given partition is mounted. For XFS, the root inode is not a
401 * constant. So we check the hard way.
404 IsPartitionMounted(char *part)
407 struct mntent *mntent;
409 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
410 while (mntent = getmntent(mntfp)) {
411 if (!strcmp(part, mntent->mnt_dir))
416 return mntent ? 1 : 1;
419 /* Check if the given inode is the root of the filesystem. */
420 #ifndef AFS_SGI_XFS_IOPS_ENV
422 IsRootInode(struct afs_stat *status)
425 * The root inode is not a fixed value in XFS partitions. So we need to
426 * see if the partition is in the list of mounted partitions. This only
427 * affects the SalvageFileSys path, so we check there.
429 return (status->st_ino == ROOTINODE);
434 #ifndef AFS_NAMEI_ENV
435 /* We don't want to salvage big files filesystems, since we can't put volumes on
439 CheckIfBigFilesFS(char *mountPoint, char *devName)
441 struct superblock fs;
444 if (strncmp(devName, "/dev/", 5)) {
445 (void)sprintf(name, "/dev/%s", devName);
447 (void)strcpy(name, devName);
450 if (ReadSuper(&fs, name) < 0) {
451 Log("Unable to read superblock. Not salvaging partition %s.\n",
455 if (IsBigFilesFileSystem(&fs)) {
456 Log("Partition %s is a big files filesystem, not salvaging.\n",
466 #define HDSTR "\\Device\\Harddisk"
467 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
469 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
474 static int dowarn = 1;
476 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
478 if (strncmp(res, HDSTR, HDLEN)) {
481 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
482 res, HDSTR, p1->devName);
486 d1 = atoi(&res[HDLEN]);
488 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
490 if (strncmp(res, HDSTR, HDLEN)) {
493 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
494 res, HDSTR, p2->devName);
498 d2 = atoi(&res[HDLEN]);
503 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
506 /* This assumes that two partitions with the same device number divided by
507 * PartsPerDisk are on the same disk.
510 SalvageFileSysParallel(struct DiskPartition64 *partP)
513 struct DiskPartition64 *partP;
514 int pid; /* Pid for this job */
515 int jobnumb; /* Log file job number */
516 struct job *nextjob; /* Next partition on disk to salvage */
518 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
519 struct job *thisjob = 0;
520 static int numjobs = 0;
521 static int jobcount = 0;
527 char logFileName[256];
531 /* We have a partition to salvage. Copy it into thisjob */
532 thisjob = (struct job *)malloc(sizeof(struct job));
534 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
537 memset(thisjob, 0, sizeof(struct job));
538 thisjob->partP = partP;
539 thisjob->jobnumb = jobcount;
541 } else if (jobcount == 0) {
542 /* We are asking to wait for all jobs (partp == 0), yet we never
545 Log("No file system partitions named %s* found; not salvaged\n",
546 VICE_PARTITION_PREFIX);
550 if (debug || Parallel == 1) {
552 SalvageFileSys(thisjob->partP, 0);
559 /* Check to see if thisjob is for a disk that we are already
560 * salvaging. If it is, link it in as the next job to do. The
561 * jobs array has 1 entry per disk being salvages. numjobs is
562 * the total number of disks currently being salvaged. In
563 * order to keep thejobs array compact, when a disk is
564 * completed, the hightest element in the jobs array is moved
565 * down to now open slot.
567 for (j = 0; j < numjobs; j++) {
568 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
569 /* On same disk, add it to this list and return */
570 thisjob->nextjob = jobs[j]->nextjob;
571 jobs[j]->nextjob = thisjob;
578 /* Loop until we start thisjob or until all existing jobs are finished */
579 while (thisjob || (!partP && (numjobs > 0))) {
580 startjob = -1; /* No new job to start */
582 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
583 /* Either the max jobs are running or we have to wait for all
584 * the jobs to finish. In either case, we wait for at least one
585 * job to finish. When it's done, clean up after it.
587 pid = wait(&wstatus);
588 osi_Assert(pid != -1);
589 for (j = 0; j < numjobs; j++) { /* Find which job it is */
590 if (pid == jobs[j]->pid)
593 osi_Assert(j < numjobs);
594 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
595 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
598 numjobs--; /* job no longer running */
599 oldjob = jobs[j]; /* remember */
600 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
601 free(oldjob); /* free the old job */
603 /* If there is another partition on the disk to salvage, then
604 * say we will start it (startjob). If not, then put thisjob there
605 * and say we will start it.
607 if (jobs[j]) { /* Another partitions to salvage */
608 startjob = j; /* Will start it */
609 } else { /* There is not another partition to salvage */
611 jobs[j] = thisjob; /* Add thisjob */
613 startjob = j; /* Will start it */
615 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
616 startjob = -1; /* Don't start it - already running */
620 /* We don't have to wait for a job to complete */
622 jobs[numjobs] = thisjob; /* Add this job */
624 startjob = numjobs; /* Will start it */
628 /* Start up a new salvage job on a partition in job slot "startjob" */
629 if (startjob != -1) {
631 Log("Starting salvage of file system partition %s\n",
632 jobs[startjob]->partP->name);
634 /* For NT, we not only fork, but re-exec the salvager. Pass in the
635 * commands and pass the child job number via the data path.
638 nt_SalvagePartition(jobs[startjob]->partP->name,
639 jobs[startjob]->jobnumb);
640 jobs[startjob]->pid = pid;
645 jobs[startjob]->pid = pid;
651 for (fd = 0; fd < 16; fd++)
658 openlog("salvager", LOG_PID, useSyslogFacility);
662 (void)afs_snprintf(logFileName, sizeof logFileName,
664 AFSDIR_SERVER_SLVGLOG_FILEPATH,
665 jobs[startjob]->jobnumb);
666 logFile = afs_fopen(logFileName, "w");
671 SalvageFileSys1(jobs[startjob]->partP, 0);
676 } /* while ( thisjob || (!partP && numjobs > 0) ) */
678 /* If waited for all jobs to complete, now collect log files and return */
680 if (!useSyslog) /* if syslogging - no need to collect */
683 for (i = 0; i < jobcount; i++) {
684 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
685 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
686 if ((passLog = afs_fopen(logFileName, "r"))) {
687 while (fgets(buf, sizeof(buf), passLog)) {
692 (void)unlink(logFileName);
701 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
703 if (!canfork || debug || Fork() == 0) {
704 SalvageFileSys1(partP, singleVolumeNumber);
705 if (canfork && !debug) {
710 Wait("SalvageFileSys");
714 get_DevName(char *pbuffer, char *wpath)
716 char pbuf[128], *ptr;
717 strcpy(pbuf, pbuffer);
718 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
724 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
726 strcpy(pbuffer, ptr + 1);
733 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
736 char inodeListPath[256];
737 FILE *inodeFile = NULL;
738 static char tmpDevName[100];
739 static char wpath[100];
740 struct VolumeSummary *vsp, *esp;
744 struct SalvInfo l_salvinfo;
745 struct SalvInfo *salvinfo = &l_salvinfo;
748 memset(salvinfo, 0, sizeof(*salvinfo));
755 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
756 Abort("Raced too many times with fileserver restarts while trying to "
757 "checkout/lock volumes; Aborted\n");
759 #ifdef AFS_DEMAND_ATTACH_FS
761 /* unlock all previous volume locks, since we're about to lock them
763 VLockFileReinit(&partP->volLockFile);
765 #endif /* AFS_DEMAND_ATTACH_FS */
767 salvinfo->fileSysPartition = partP;
768 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
769 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
772 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
773 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
774 name = partP->devName;
776 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
777 strcpy(tmpDevName, partP->devName);
778 name = get_DevName(tmpDevName, wpath);
779 salvinfo->fileSysDeviceName = name;
780 salvinfo->filesysfulldev = wpath;
783 if (singleVolumeNumber) {
784 #ifndef AFS_DEMAND_ATTACH_FS
785 /* only non-DAFS locks the partition when salvaging a single volume;
786 * DAFS will lock the individual volumes in the VG */
787 VLockPartition(partP->name);
788 #endif /* !AFS_DEMAND_ATTACH_FS */
792 /* salvageserver already setup fssync conn for us */
793 if ((programType != salvageServer) && !VConnectFS()) {
794 Abort("Couldn't connect to file server\n");
797 salvinfo->useFSYNC = 1;
798 AskOffline(salvinfo, singleVolumeNumber);
799 #ifdef AFS_DEMAND_ATTACH_FS
800 if (LockVolume(salvinfo, singleVolumeNumber)) {
803 #endif /* AFS_DEMAND_ATTACH_FS */
806 salvinfo->useFSYNC = 0;
807 VLockPartition(partP->name);
811 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
814 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
815 partP->name, name, (Testing ? "(READONLY mode)" : ""));
817 Log("***Forced salvage of all volumes on this partition***\n");
822 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
829 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
830 while ((dp = readdir(dirp))) {
831 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
832 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
834 Log("Removing old salvager temp files %s\n", dp->d_name);
835 strcpy(npath, salvinfo->fileSysPath);
836 strcat(npath, OS_DIRSEP);
837 strcat(npath, dp->d_name);
843 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
845 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
846 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
848 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
852 inodeFile = fopen(inodeListPath, "w+b");
854 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
857 /* Using nt_unlink here since we're really using the delete on close
858 * semantics of unlink. In most places in the salvager, we really do
859 * mean to unlink the file at that point. Those places have been
860 * modified to actually do that so that the NT crt can be used there.
862 code = nt_unlink(inodeListPath);
864 code = unlink(inodeListPath);
867 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
870 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
874 salvinfo->inodeFd = fileno(inodeFile);
875 if (salvinfo->inodeFd == -1)
876 Abort("Temporary file %s is missing...\n", inodeListPath);
877 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
878 if (ListInodeOption) {
879 PrintInodeList(salvinfo);
882 /* enumerate volumes in the partition.
883 * figure out sets of read-only + rw volumes.
884 * salvage each set, read-only volumes first, then read-write.
885 * Fix up inodes on last volume in set (whether it is read-write
888 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
892 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
893 i < salvinfo->nVolumesInInodeFile; i = j) {
894 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
896 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
898 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
899 struct VolumeSummary *tsp;
900 /* Scan volume list (from partition root directory) looking for the
901 * current rw volume number in the volume list from the inode scan.
902 * If there is one here that is not in the inode volume list,
904 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
906 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
908 /* Now match up the volume summary info from the root directory with the
909 * entry in the volume list obtained from scanning inodes */
910 salvinfo->inodeSummary[j].volSummary = NULL;
911 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
912 if (tsp->header.id == vid) {
913 salvinfo->inodeSummary[j].volSummary = tsp;
919 /* Salvage the group of volumes (several read-only + 1 read/write)
920 * starting with the current read-only volume we're looking at.
922 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
925 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
926 for (; vsp < esp; vsp++) {
928 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
931 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
932 RemoveTheForce(salvinfo->fileSysPath);
934 if (!Testing && singleVolumeNumber) {
935 #ifdef AFS_DEMAND_ATTACH_FS
936 /* unlock vol headers so the fs can attach them when we AskOnline */
937 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
938 #endif /* AFS_DEMAND_ATTACH_FS */
940 AskOnline(salvinfo, singleVolumeNumber);
942 /* Step through the volumeSummary list and set all volumes on-line.
943 * The volumes were taken off-line in GetVolumeSummary.
945 for (j = 0; j < salvinfo->nVolumes; j++) {
946 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
950 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
951 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
954 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
958 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
961 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
964 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
967 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
969 Log("Error %ld destroying volume disk header for volume %lu\n",
970 afs_printable_int32_ld(code),
971 afs_printable_uint32_lu(vsp->header.id));
974 /* make sure we actually delete the fileName file; ENOENT
975 * is fine, since VDestroyVolumeDiskHeader probably already
977 if (unlink(path) && errno != ENOENT) {
978 Log("Unable to unlink %s (errno = %d)\n", path, errno);
985 CompareInodes(const void *_p1, const void *_p2)
987 const struct ViceInodeInfo *p1 = _p1;
988 const struct ViceInodeInfo *p2 = _p2;
989 if (p1->u.vnode.vnodeNumber == INODESPECIAL
990 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
991 VolumeId p1rwid, p2rwid;
993 (p1->u.vnode.vnodeNumber ==
994 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
996 (p2->u.vnode.vnodeNumber ==
997 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1000 if (p1rwid > p2rwid)
1002 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1003 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1004 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1005 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1006 if (p1->u.vnode.volumeId == p1rwid)
1008 if (p2->u.vnode.volumeId == p2rwid)
1010 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1012 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1013 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1014 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1016 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1018 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1020 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1022 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1024 /* The following tests are reversed, so that the most desirable
1025 * of several similar inodes comes first */
1026 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1027 #ifdef AFS_3DISPARES
1028 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1029 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1032 #ifdef AFS_SGI_EXMAG
1033 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1034 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1039 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1040 #ifdef AFS_3DISPARES
1041 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1042 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1045 #ifdef AFS_SGI_EXMAG
1046 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1047 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1052 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1053 #ifdef AFS_3DISPARES
1054 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1055 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1058 #ifdef AFS_SGI_EXMAG
1059 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1060 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1065 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1066 #ifdef AFS_3DISPARES
1067 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1068 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1071 #ifdef AFS_SGI_EXMAG
1072 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1073 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1082 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1083 struct InodeSummary *summary)
1085 VolumeId volume = ip->u.vnode.volumeId;
1086 VolumeId rwvolume = volume;
1091 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1093 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1095 rwvolume = ip->u.special.parentId;
1096 /* This isn't quite right, as there could (in error) be different
1097 * parent inodes in different special vnodes */
1099 if (maxunique < ip->u.vnode.vnodeUniquifier)
1100 maxunique = ip->u.vnode.vnodeUniquifier;
1104 summary->volumeId = volume;
1105 summary->RWvolumeId = rwvolume;
1106 summary->nInodes = n;
1107 summary->nSpecialInodes = nSpecial;
1108 summary->maxUniquifier = maxunique;
1112 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1114 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1115 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1116 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1121 * Collect list of inodes in file named by path. If a truly fatal error,
1122 * unlink the file and abort. For lessor errors, return -1. The file will
1123 * be unlinked by the caller.
1126 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1128 struct afs_stat status;
1131 struct ViceInodeInfo *ip, *ip_save;
1132 struct InodeSummary summary;
1133 char summaryFileName[50];
1136 char *dev = salvinfo->fileSysPath;
1137 char *wpath = salvinfo->fileSysPath;
1139 char *dev = salvinfo->fileSysDeviceName;
1140 char *wpath = salvinfo->filesysfulldev;
1142 char *part = salvinfo->fileSysPath;
1146 /* This file used to come from vfsck; cobble it up ourselves now... */
1148 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1149 singleVolumeNumber ? OnlyOneVolume : 0,
1150 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1152 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1155 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1157 if (forceSal && !ForceSalvage) {
1158 Log("***Forced salvage of all volumes on this partition***\n");
1161 fseek(inodeFile, 0L, SEEK_SET);
1162 salvinfo->inodeFd = fileno(inodeFile);
1163 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1164 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1166 tdir = (tmpdir ? tmpdir : part);
1168 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1169 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1171 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1172 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1174 summaryFile = afs_fopen(summaryFileName, "a+");
1175 if (summaryFile == NULL) {
1176 Abort("Unable to create inode summary file\n");
1180 /* Using nt_unlink here since we're really using the delete on close
1181 * semantics of unlink. In most places in the salvager, we really do
1182 * mean to unlink the file at that point. Those places have been
1183 * modified to actually do that so that the NT crt can be used there.
1185 code = nt_unlink(summaryFileName);
1187 code = unlink(summaryFileName);
1190 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1193 if (!canfork || debug || Fork() == 0) {
1195 unsigned long st_size=(unsigned long) status.st_size;
1196 nInodes = st_size / sizeof(struct ViceInodeInfo);
1198 fclose(summaryFile);
1199 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1200 RemoveTheForce(salvinfo->fileSysPath);
1202 struct VolumeSummary *vsp;
1205 GetVolumeSummary(salvinfo, singleVolumeNumber);
1207 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1209 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1212 Log("%s vice inodes on %s; not salvaged\n",
1213 singleVolumeNumber ? "No applicable" : "No", dev);
1216 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1218 fclose(summaryFile);
1220 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1223 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to read inode table; %s not salvaged\n", dev);
1227 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1228 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1229 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1230 fclose(summaryFile);
1231 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1236 CountVolumeInodes(ip, nInodes, &summary);
1237 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1238 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1239 fclose(summaryFile);
1242 summary.index += (summary.nInodes);
1243 nInodes -= summary.nInodes;
1244 ip += summary.nInodes;
1247 ip = ip_save = NULL;
1248 /* Following fflush is not fclose, because if it was debug mode would not work */
1249 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1250 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1251 fclose(summaryFile);
1254 if (canfork && !debug) {
1259 if (Wait("Inode summary") == -1) {
1260 fclose(summaryFile);
1261 Exit(1); /* salvage of this partition aborted */
1264 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1265 if (status.st_size != 0) {
1267 unsigned long st_status=(unsigned long)status.st_size;
1268 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1269 osi_Assert(salvinfo->inodeSummary != NULL);
1270 /* For GNU we need to do lseek to get the file pointer moved. */
1271 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1272 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1273 osi_Assert(ret == st_status);
1275 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1276 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1277 salvinfo->inodeSummary[i].volSummary = NULL;
1279 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1280 fclose(summaryFile);
1284 /* Comparison routine for volume sort.
1285 This is setup so that a read-write volume comes immediately before
1286 any read-only clones of that volume */
1288 CompareVolumes(const void *_p1, const void *_p2)
1290 const struct VolumeSummary *p1 = _p1;
1291 const struct VolumeSummary *p2 = _p2;
1292 if (p1->header.parent != p2->header.parent)
1293 return p1->header.parent < p2->header.parent ? -1 : 1;
1294 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1296 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1298 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1302 * Gleans volumeSummary information by asking the fileserver
1304 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1305 * salvaging a whole partition
1307 * @return whether we obtained the volume summary information or not
1308 * @retval 0 success; we obtained the volume summary information
1309 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1311 * @retval 1 we did not get the volume summary information; either the
1312 * fileserver responded with an error, or we are not supposed to
1313 * ask the fileserver for the information (e.g. we are salvaging
1314 * the entire partition or we are not the salvageserver)
1316 * @note for non-DAFS, always returns 1
1319 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1322 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1323 if (programType == salvageServer) {
1324 if (singleVolumeNumber) {
1325 FSSYNC_VGQry_response_t q_res;
1327 struct VolumeSummary *vsp;
1329 struct VolumeDiskHeader diskHdr;
1331 memset(&res, 0, sizeof(res));
1333 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1336 * We must wait for the partition to finish scanning before
1337 * can continue, since we will not know if we got the entire
1338 * VG membership unless the partition is fully scanned.
1339 * We could, in theory, just scan the partition ourselves if
1340 * the VG cache is not ready, but we would be doing the exact
1341 * same scan the fileserver is doing; it will almost always
1342 * be faster to wait for the fileserver. The only exceptions
1343 * are if the partition does not take very long to scan, and
1344 * in that case it's fast either way, so who cares?
1346 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1347 Log("waiting for fileserver to finish scanning partition %s...\n",
1348 salvinfo->fileSysPartition->name);
1350 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1351 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1352 * just so small partitions don't need to wait over 10
1353 * seconds every time, and large partitions are generally
1354 * polled only once every ten seconds. */
1355 sleep((i > 10) ? (i = 10) : i);
1357 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1361 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1362 /* This can happen if there's no header for the volume
1363 * we're salvaging, or no headers exist for the VG (if
1364 * we're salvaging an RW). Act as if we got a response
1365 * with no VG members. The headers may be created during
1366 * salvaging, if there are inodes in this VG. */
1368 memset(&q_res, 0, sizeof(q_res));
1369 q_res.rw = singleVolumeNumber;
1373 Log("fileserver refused VGCQuery request for volume %lu on "
1374 "partition %s, code %ld reason %ld\n",
1375 afs_printable_uint32_lu(singleVolumeNumber),
1376 salvinfo->fileSysPartition->name,
1377 afs_printable_int32_ld(code),
1378 afs_printable_int32_ld(res.hdr.reason));
1382 if (q_res.rw != singleVolumeNumber) {
1383 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1384 afs_printable_uint32_lu(singleVolumeNumber),
1385 afs_printable_uint32_lu(q_res.rw));
1386 #ifdef SALVSYNC_BUILD_CLIENT
1387 if (SALVSYNC_LinkVolume(q_res.rw,
1389 salvinfo->fileSysPartition->name,
1391 Log("schedule request failed\n");
1393 #endif /* SALVSYNC_BUILD_CLIENT */
1394 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1397 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1398 osi_Assert(salvinfo->volumeSummaryp != NULL);
1400 salvinfo->nVolumes = 0;
1401 vsp = salvinfo->volumeSummaryp;
1403 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1404 char name[VMAXPATHLEN];
1406 if (!q_res.children[i]) {
1410 /* AskOffline for singleVolumeNumber was called much earlier */
1411 if (q_res.children[i] != singleVolumeNumber) {
1412 AskOffline(salvinfo, q_res.children[i]);
1413 if (LockVolume(salvinfo, q_res.children[i])) {
1419 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1421 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1422 afs_printable_uint32_lu(q_res.children[i]));
1427 DiskToVolumeHeader(&vsp->header, &diskHdr);
1428 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1429 vsp->fileName = ToString(name);
1430 salvinfo->nVolumes++;
1434 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1439 Log("Cannot get volume summary from fileserver; falling back to scanning "
1440 "entire partition\n");
1443 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1448 * count how many volume headers are found by VWalkVolumeHeaders.
1450 * @param[in] dp the disk partition (unused)
1451 * @param[in] name full path to the .vol header (unused)
1452 * @param[in] hdr the header data (unused)
1453 * @param[in] last whether this is the last try or not (unused)
1454 * @param[in] rock actually an afs_int32*; the running count of how many
1455 * volumes we have found
1460 CountHeader(struct DiskPartition64 *dp, const char *name,
1461 struct VolumeDiskHeader *hdr, int last, void *rock)
1463 afs_int32 *nvols = (afs_int32 *)rock;
1469 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1472 struct SalvageScanParams {
1473 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1474 * vol id of the VG we're salvaging */
1475 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1476 * we're filling in */
1477 afs_int32 nVolumes; /**< # of vols we've encountered */
1478 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1479 * # of vols we've alloc'd memory for) */
1480 int retry; /**< do we need to retry vol lock/checkout? */
1481 struct SalvInfo *salvinfo; /**< salvage job info */
1485 * records volume summary info found from VWalkVolumeHeaders.
1487 * Found volumes are also taken offline if they are in the specific volume
1488 * group we are looking for.
1490 * @param[in] dp the disk partition
1491 * @param[in] name full path to the .vol header
1492 * @param[in] hdr the header data
1493 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1494 * @param[in] rock actually a struct SalvageScanParams*, containing the
1495 * information needed to record the volume summary data
1497 * @return operation status
1499 * @retval -1 volume locking raced with fileserver restart; checking out
1500 * and locking volumes needs to be retried
1501 * @retval 1 volume header is mis-named and should be deleted
1504 RecordHeader(struct DiskPartition64 *dp, const char *name,
1505 struct VolumeDiskHeader *hdr, int last, void *rock)
1507 char nameShouldBe[64];
1508 struct SalvageScanParams *params;
1509 struct VolumeSummary summary;
1510 VolumeId singleVolumeNumber;
1511 struct SalvInfo *salvinfo;
1513 params = (struct SalvageScanParams *)rock;
1515 singleVolumeNumber = params->singleVolumeNumber;
1516 salvinfo = params->salvinfo;
1518 DiskToVolumeHeader(&summary.header, hdr);
1520 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1521 && summary.header.parent != singleVolumeNumber) {
1523 if (programType == salvageServer) {
1524 #ifdef SALVSYNC_BUILD_CLIENT
1525 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1526 summary.header.id, summary.header.parent);
1527 if (SALVSYNC_LinkVolume(summary.header.parent,
1531 Log("schedule request failed\n");
1534 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1537 Log("%u is a read-only volume; not salvaged\n",
1538 singleVolumeNumber);
1543 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1544 || summary.header.parent == singleVolumeNumber) {
1546 /* check if the header file is incorrectly named */
1548 const char *base = strrchr(name, OS_DIRSEPC);
1555 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1556 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1559 if (strcmp(nameShouldBe, base)) {
1560 /* .vol file has wrong name; retry/delete */
1564 if (!badname || last) {
1565 /* only offline the volume if the header is good, or if this is
1566 * the last try looking at it; avoid AskOffline'ing the same vol
1569 if (singleVolumeNumber
1570 && summary.header.id != singleVolumeNumber) {
1571 /* don't offline singleVolumeNumber; we already did that
1574 AskOffline(salvinfo, summary.header.id);
1576 #ifdef AFS_DEMAND_ATTACH_FS
1578 /* don't lock the volume if the header is bad, since we're
1579 * about to delete it anyway. */
1580 if (LockVolume(salvinfo, summary.header.id)) {
1585 #endif /* AFS_DEMAND_ATTACH_FS */
1589 if (last && !Showmode) {
1590 Log("Volume header file %s is incorrectly named (should be %s "
1591 "not %s); %sdeleted (it will be recreated later, if "
1592 "necessary)\n", name, nameShouldBe, base,
1593 (Testing ? "it would have been " : ""));
1598 summary.fileName = ToString(base);
1601 if (params->nVolumes > params->totalVolumes) {
1602 /* We found more volumes than we found on the first partition walk;
1603 * apparently something created a volume while we were
1604 * partition-salvaging, or we found more than 20 vols when salvaging a
1605 * particular volume. Abort if we detect this, since other programs
1606 * supposed to not touch the partition while it is partition-salvaging,
1607 * and we shouldn't find more than 20 vols in a VG.
1609 Abort("Found %ld vol headers, but should have found at most %ld! "
1610 "Make sure the volserver/fileserver are not running at the "
1611 "same time as a partition salvage\n",
1612 afs_printable_int32_ld(params->nVolumes),
1613 afs_printable_int32_ld(params->totalVolumes));
1616 memcpy(params->vsp, &summary, sizeof(summary));
1624 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1626 * If the header could not be read in at all, the header is always unlinked.
1627 * If instead RecordHeader said the header was bad (that is, the header file
1628 * is mis-named), we only unlink if we are doing a partition salvage, as
1629 * opposed to salvaging a specific volume group.
1631 * @param[in] dp the disk partition
1632 * @param[in] name full path to the .vol header
1633 * @param[in] hdr header data, or NULL if the header could not be read
1634 * @param[in] rock actually a struct SalvageScanParams*, with some information
1638 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1639 struct VolumeDiskHeader *hdr, void *rock)
1641 struct SalvageScanParams *params;
1644 params = (struct SalvageScanParams *)rock;
1647 /* no header; header is too bogus to read in at all */
1649 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1655 } else if (!params->singleVolumeNumber) {
1656 /* We were able to read in a header, but RecordHeader said something
1657 * was wrong with it. We only unlink those if we are doing a partition
1664 if (dounlink && unlink(name)) {
1665 Log("Error %d while trying to unlink %s\n", errno, name);
1670 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1671 * the fileserver for VG information, or by scanning the /vicepX partition.
1673 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1674 * are salvaging, or 0 if this is a partition
1677 * @return operation status
1679 * @retval -1 we raced with a fileserver restart; checking out and locking
1680 * volumes must be retried
1683 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1685 afs_int32 nvols = 0;
1686 struct SalvageScanParams params;
1689 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1691 /* we successfully got the vol information from the fileserver; no
1692 * need to scan the partition */
1696 /* we need to retry volume checkout */
1700 if (!singleVolumeNumber) {
1701 /* Count how many volumes we have in /vicepX */
1702 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1705 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1710 nvols = VOL_VG_MAX_VOLS;
1713 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1714 osi_Assert(salvinfo->volumeSummaryp != NULL);
1716 params.singleVolumeNumber = singleVolumeNumber;
1717 params.vsp = salvinfo->volumeSummaryp;
1718 params.nVolumes = 0;
1719 params.totalVolumes = nvols;
1721 params.salvinfo = salvinfo;
1723 /* walk the partition directory of volume headers and record the info
1724 * about them; unlinking invalid headers */
1725 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1726 UnlinkHeader, ¶ms);
1728 /* we apparently need to retry checking-out/locking volumes */
1732 Abort("Failed to get volume header summary\n");
1734 salvinfo->nVolumes = params.nVolumes;
1736 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1742 /* Find the link table. This should be associated with the RW volume or, if
1743 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1746 FindLinkHandle(struct InodeSummary *isp, int nVols,
1747 struct ViceInodeInfo *allInodes)
1750 struct ViceInodeInfo *ip;
1752 for (i = 0; i < nVols; i++) {
1753 ip = allInodes + isp[i].index;
1754 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1755 if (ip[j].u.special.type == VI_LINKTABLE)
1756 return ip[j].inodeNumber;
1763 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1765 struct versionStamp version;
1768 if (!VALID_INO(ino))
1770 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1771 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1772 if (!VALID_INO(ino))
1774 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1775 isp->RWvolumeId, errno);
1776 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1777 fdP = IH_OPEN(salvinfo->VGLinkH);
1779 Abort("Can't open link table for volume %u (error = %d)\n",
1780 isp->RWvolumeId, errno);
1782 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1783 Abort("Can't truncate link table for volume %u (error = %d)\n",
1784 isp->RWvolumeId, errno);
1786 version.magic = LINKTABLEMAGIC;
1787 version.version = LINKTABLEVERSION;
1789 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1791 Abort("Can't truncate link table for volume %u (error = %d)\n",
1792 isp->RWvolumeId, errno);
1794 FDH_REALLYCLOSE(fdP);
1796 /* If the volume summary exits (i.e., the V*.vol header file exists),
1797 * then set this inode there as well.
1799 if (isp->volSummary)
1800 isp->volSummary->header.linkTable = ino;
1809 SVGParms_t *parms = (SVGParms_t *) arg;
1810 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1815 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1818 pthread_attr_t tattr;
1822 /* Initialize per volume global variables, even if later code does so */
1823 salvinfo->VolumeChanged = 0;
1824 salvinfo->VGLinkH = NULL;
1825 salvinfo->VGLinkH_cnt = 0;
1826 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1828 parms.svgp_inodeSummaryp = isp;
1829 parms.svgp_count = nVols;
1830 parms.svgp_salvinfo = salvinfo;
1831 code = pthread_attr_init(&tattr);
1833 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1837 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1839 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1842 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1844 Log("Failed to create thread to salvage volume group %u\n",
1848 (void)pthread_join(tid, NULL);
1850 #endif /* AFS_NT40_ENV */
1853 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1855 struct ViceInodeInfo *inodes, *allInodes, *ip;
1856 int i, totalInodes, size, salvageTo;
1860 int dec_VGLinkH = 0;
1862 FdHandle_t *fdP = NULL;
1864 salvinfo->VGLinkH_cnt = 0;
1865 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1866 && isp->nSpecialInodes > 0);
1867 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1868 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1871 if (ShowMounts && !haveRWvolume)
1873 if (canfork && !debug && Fork() != 0) {
1874 (void)Wait("Salvage volume group");
1877 for (i = 0, totalInodes = 0; i < nVols; i++)
1878 totalInodes += isp[i].nInodes;
1879 size = totalInodes * sizeof(struct ViceInodeInfo);
1880 inodes = (struct ViceInodeInfo *)malloc(size);
1881 allInodes = inodes - isp->index; /* this would the base of all the inodes
1882 * for the partition, if all the inodes
1883 * had been read into memory */
1884 osi_Assert(afs_lseek
1885 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1887 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1889 /* Don't try to salvage a read write volume if there isn't one on this
1891 salvageTo = haveRWvolume ? 0 : 1;
1893 #ifdef AFS_NAMEI_ENV
1894 ino = FindLinkHandle(isp, nVols, allInodes);
1895 if (VALID_INO(ino)) {
1896 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1897 fdP = IH_OPEN(salvinfo->VGLinkH);
1899 if (!VALID_INO(ino) || fdP == NULL) {
1900 Log("%s link table for volume %u.\n",
1901 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1903 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1906 struct ViceInodeInfo *ip;
1907 CreateLinkTable(salvinfo, isp, ino);
1908 fdP = IH_OPEN(salvinfo->VGLinkH);
1909 /* Sync fake 1 link counts to the link table, now that it exists */
1911 for (i = 0; i < nVols; i++) {
1912 ip = allInodes + isp[i].index;
1913 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1914 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1921 FDH_REALLYCLOSE(fdP);
1923 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1926 /* Salvage in reverse order--read/write volume last; this way any
1927 * Inodes not referenced by the time we salvage the read/write volume
1928 * can be picked up by the read/write volume */
1929 /* ACTUALLY, that's not done right now--the inodes just vanish */
1930 for (i = nVols - 1; i >= salvageTo; i--) {
1932 struct InodeSummary *lisp = &isp[i];
1933 #ifdef AFS_NAMEI_ENV
1934 /* If only the RO is present on this partition, the link table
1935 * shows up as a RW volume special file. Need to make sure the
1936 * salvager doesn't try to salvage the non-existent RW.
1938 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1939 /* If this only special inode is the link table, continue */
1940 if (inodes->u.special.type == VI_LINKTABLE) {
1947 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1948 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1949 /* Check inodes twice. The second time do things seriously. This
1950 * way the whole RO volume can be deleted, below, if anything goes wrong */
1951 for (check = 1; check >= 0; check--) {
1953 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1955 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1956 if (rw && deleteMe) {
1957 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1958 * volume won't be called */
1964 if (rw && check == 1)
1966 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1967 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1973 /* Fix actual inode counts */
1976 Log("totalInodes %d\n",totalInodes);
1977 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1978 static int TraceBadLinkCounts = 0;
1979 #ifdef AFS_NAMEI_ENV
1980 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1981 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1982 VGLinkH_p1 = ip->u.param[0];
1983 continue; /* Deal with this last. */
1986 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1987 TraceBadLinkCounts--; /* Limit reports, per volume */
1988 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1990 while (ip->linkCount > 0) {
1991 /* below used to assert, not break */
1993 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1994 Log("idec failed. inode %s errno %d\n",
1995 PrintInode(stmp, ip->inodeNumber), errno);
2001 while (ip->linkCount < 0) {
2002 /* these used to be asserts */
2004 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2005 Log("iinc failed. inode %s errno %d\n",
2006 PrintInode(stmp, ip->inodeNumber), errno);
2013 #ifdef AFS_NAMEI_ENV
2014 while (dec_VGLinkH > 0) {
2015 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2016 Log("idec failed on link table, errno = %d\n", errno);
2020 while (dec_VGLinkH < 0) {
2021 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2022 Log("iinc failed on link table, errno = %d\n", errno);
2029 /* Directory consistency checks on the rw volume */
2031 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2032 IH_RELEASE(salvinfo->VGLinkH);
2034 if (canfork && !debug) {
2041 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2043 /* Check headers BEFORE forking */
2047 for (i = 0; i < nVols; i++) {
2048 struct VolumeSummary *vs = isp[i].volSummary;
2049 VolumeDiskData volHeader;
2051 /* Don't salvage just because phantom rw volume is there... */
2052 /* (If a read-only volume exists, read/write inodes must also exist) */
2053 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2057 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2058 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2059 == sizeof(volHeader)
2060 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2061 && volHeader.dontSalvage == DONT_SALVAGE
2062 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2063 if (volHeader.inUse != 0) {
2064 volHeader.inUse = 0;
2065 volHeader.inService = 1;
2067 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2068 != sizeof(volHeader)) {
2084 /* SalvageVolumeHeaderFile
2086 * Salvage the top level V*.vol header file. Make sure the special files
2087 * exist and that there are no duplicates.
2089 * Calls SalvageHeader for each possible type of volume special file.
2093 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2094 struct ViceInodeInfo *inodes, int RW,
2095 int check, int *deleteMe)
2098 struct ViceInodeInfo *ip;
2099 int allinodesobsolete = 1;
2100 struct VolumeDiskHeader diskHeader;
2101 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2103 struct VolumeHeader tempHeader;
2104 struct afs_inode_info stuff[MAXINODETYPE];
2106 /* keeps track of special inodes that are probably 'good'; they are
2107 * referenced in the vol header, and are included in the given inodes
2112 } goodspecial[MAXINODETYPE];
2117 memset(goodspecial, 0, sizeof(goodspecial));
2119 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2121 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2123 Log("cannot allocate memory for inode skip array when salvaging "
2124 "volume %lu; not performing duplicate special inode recovery\n",
2125 afs_printable_uint32_lu(isp->volumeId));
2126 /* still try to perform the salvage; the skip array only does anything
2127 * if we detect duplicate special inodes */
2130 init_inode_info(&tempHeader, stuff);
2133 * First, look at the special inodes and see if any are referenced by
2134 * the existing volume header. If we find duplicate special inodes, we
2135 * can use this information to use the referenced inode (it's more
2136 * likely to be the 'good' one), and throw away the duplicates.
2138 if (isp->volSummary && skip) {
2139 /* use tempHeader, so we can use the stuff[] array to easily index
2140 * into the isp->volSummary special inodes */
2141 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2143 for (i = 0; i < isp->nSpecialInodes; i++) {
2144 ip = &inodes[isp->index + i];
2145 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2146 /* will get taken care of in a later loop */
2149 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2150 goodspecial[ip->u.special.type-1].valid = 1;
2151 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2156 memset(&tempHeader, 0, sizeof(tempHeader));
2157 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2158 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2159 tempHeader.id = isp->volumeId;
2160 tempHeader.parent = isp->RWvolumeId;
2162 /* Check for duplicates (inodes are sorted by type field) */
2163 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2164 ip = &inodes[isp->index + i];
2165 if (ip->u.special.type == (ip + 1)->u.special.type) {
2166 afs_ino_str_t stmp1, stmp2;
2168 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2169 /* Will be caught in the loop below */
2173 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2174 ip->u.special.type, isp->volumeId,
2175 PrintInode(stmp1, ip->inodeNumber),
2176 PrintInode(stmp2, (ip+1)->inodeNumber));
2178 if (skip && goodspecial[ip->u.special.type-1].valid) {
2179 Inode gi = goodspecial[ip->u.special.type-1].inode;
2182 Log("using special inode referenced by vol header (%s)\n",
2183 PrintInode(stmp1, gi));
2186 /* the volume header references some special inode of
2187 * this type in the inodes array; are we it? */
2188 if (ip->inodeNumber != gi) {
2190 } else if ((ip+1)->inodeNumber != gi) {
2191 /* in case this is the last iteration; we need to
2192 * make sure we check ip+1, too */
2197 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2205 for (i = 0; i < isp->nSpecialInodes; i++) {
2207 ip = &inodes[isp->index + i];
2208 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2210 Log("Rubbish header inode %s of type %d\n",
2211 PrintInode(stmp, ip->inodeNumber),
2212 ip->u.special.type);
2218 Log("Rubbish header inode %s of type %d; deleted\n",
2219 PrintInode(stmp, ip->inodeNumber),
2220 ip->u.special.type);
2221 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2222 if (skip && skip[i]) {
2223 if (orphans == ORPH_REMOVE) {
2224 Log("Removing orphan special inode %s of type %d\n",
2225 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2228 Log("Ignoring orphan special inode %s of type %d\n",
2229 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2230 /* fall through to the ip->linkCount--; line below */
2233 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2234 allinodesobsolete = 0;
2236 if (!check && ip->u.special.type != VI_LINKTABLE)
2237 ip->linkCount--; /* Keep the inode around */
2245 if (allinodesobsolete) {
2252 salvinfo->VGLinkH_cnt++; /* one for every header. */
2254 if (!RW && !check && isp->volSummary) {
2255 ClearROInUseBit(isp->volSummary);
2259 for (i = 0; i < MAXINODETYPE; i++) {
2260 if (stuff[i].inodeType == VI_LINKTABLE) {
2261 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2262 * And we may have recreated the link table earlier, so set the
2263 * RW header as well.
2265 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2266 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2270 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2274 if (isp->volSummary == NULL) {
2276 char headerName[64];
2277 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2278 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2280 Log("No header file for volume %u\n", isp->volumeId);
2284 Log("No header file for volume %u; %screating %s\n",
2285 isp->volumeId, (Testing ? "it would have been " : ""),
2287 isp->volSummary = (struct VolumeSummary *)
2288 malloc(sizeof(struct VolumeSummary));
2289 isp->volSummary->fileName = ToString(headerName);
2291 writefunc = VCreateVolumeDiskHeader;
2294 char headerName[64];
2295 /* hack: these two fields are obsolete... */
2296 isp->volSummary->header.volumeAcl = 0;
2297 isp->volSummary->header.volumeMountTable = 0;
2300 (&isp->volSummary->header, &tempHeader,
2301 sizeof(struct VolumeHeader))) {
2302 /* We often remove the name before calling us, so we make a fake one up */
2303 if (isp->volSummary->fileName) {
2304 strcpy(headerName, isp->volSummary->fileName);
2306 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2307 isp->volSummary->fileName = ToString(headerName);
2309 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2311 Log("Header file %s is damaged or no longer valid%s\n", path,
2312 (check ? "" : "; repairing"));
2316 writefunc = VWriteVolumeDiskHeader;
2320 memcpy(&isp->volSummary->header, &tempHeader,
2321 sizeof(struct VolumeHeader));
2324 Log("It would have written a new header file for volume %u\n",
2328 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2329 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2331 Log("Error %ld writing volume header file for volume %lu\n",
2332 afs_printable_int32_ld(code),
2333 afs_printable_uint32_lu(diskHeader.id));
2338 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2339 isp->volSummary->header.volumeInfo);
2344 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2345 struct InodeSummary *isp, int check, int *deleteMe)
2348 VolumeDiskData volumeInfo;
2349 struct versionStamp fileHeader;
2358 #ifndef AFS_NAMEI_ENV
2359 if (sp->inodeType == VI_LINKTABLE)
2362 if (*(sp->inode) == 0) {
2364 Log("Missing inode in volume header (%s)\n", sp->description);
2368 Log("Missing inode in volume header (%s); %s\n", sp->description,
2369 (Testing ? "it would have recreated it" : "recreating"));
2372 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2373 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2374 if (!VALID_INO(*(sp->inode)))
2376 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2377 sp->description, errno);
2382 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2383 fdP = IH_OPEN(specH);
2384 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2385 /* bail out early and destroy the volume */
2387 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2394 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2395 sp->description, errno);
2398 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2399 || header.fileHeader.magic != sp->stamp.magic)) {
2401 Log("Part of the header (%s) is corrupted\n", sp->description);
2402 FDH_REALLYCLOSE(fdP);
2406 Log("Part of the header (%s) is corrupted; recreating\n",
2409 /* header can be garbage; make sure we don't read garbage data from
2411 memset(&header, 0, sizeof(header));
2413 if (sp->inodeType == VI_VOLINFO
2414 && header.volumeInfo.destroyMe == DESTROY_ME) {
2417 FDH_REALLYCLOSE(fdP);
2421 if (recreate && !Testing) {
2424 ("Internal error: recreating volume header (%s) in check mode\n",
2426 nBytes = FDH_TRUNC(fdP, 0);
2428 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2429 sp->description, errno);
2431 /* The following code should be moved into vutil.c */
2432 if (sp->inodeType == VI_VOLINFO) {
2434 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2435 header.volumeInfo.stamp = sp->stamp;
2436 header.volumeInfo.id = isp->volumeId;
2437 header.volumeInfo.parentId = isp->RWvolumeId;
2438 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2439 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2440 isp->volumeId, isp->volumeId);
2441 header.volumeInfo.inService = 0;
2442 header.volumeInfo.blessed = 0;
2443 /* The + 1000 is a hack in case there are any files out in venus caches */
2444 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2445 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2446 header.volumeInfo.needsCallback = 0;
2447 gettimeofday(&tp, 0);
2448 header.volumeInfo.creationDate = tp.tv_sec;
2450 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2451 sizeof(header.volumeInfo), 0);
2452 if (nBytes != sizeof(header.volumeInfo)) {
2455 ("Unable to write volume header file (%s) (errno = %d)\n",
2456 sp->description, errno);
2457 Abort("Unable to write entire volume header file (%s)\n",
2461 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2462 if (nBytes != sizeof(sp->stamp)) {
2465 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2466 sp->description, errno);
2468 ("Unable to write entire version stamp in volume header file (%s)\n",
2473 FDH_REALLYCLOSE(fdP);
2475 if (sp->inodeType == VI_VOLINFO) {
2476 salvinfo->VolInfo = header.volumeInfo;
2480 if (salvinfo->VolInfo.updateDate) {
2481 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2483 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2484 salvinfo->VolInfo.id,
2485 (Testing ? "it would have been " : ""), update);
2487 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2489 Log("%s (%u) not updated (created %s)\n",
2490 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2500 SalvageVnodes(struct SalvInfo *salvinfo,
2501 struct InodeSummary *rwIsp,
2502 struct InodeSummary *thisIsp,
2503 struct ViceInodeInfo *inodes, int check)
2505 int ilarge, ismall, ioffset, RW, nInodes;
2506 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2509 RW = (rwIsp == thisIsp);
2510 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2512 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2513 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2514 if (check && ismall == -1)
2517 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2518 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2519 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2523 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2524 struct ViceInodeInfo *ip, int nInodes,
2525 struct VolumeSummary *volSummary, int check)
2527 char buf[SIZEOF_LARGEDISKVNODE];
2528 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2530 StreamHandle_t *file;
2531 struct VnodeClassInfo *vcp;
2533 afs_sfsize_t nVnodes;
2534 afs_fsize_t vnodeLength;
2536 afs_ino_str_t stmp1, stmp2;
2540 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2541 fdP = IH_OPEN(handle);
2542 osi_Assert(fdP != NULL);
2543 file = FDH_FDOPEN(fdP, "r+");
2544 osi_Assert(file != NULL);
2545 vcp = &VnodeClassInfo[class];
2546 size = OS_SIZE(fdP->fd_fd);
2547 osi_Assert(size != -1);
2548 nVnodes = (size / vcp->diskSize) - 1;
2550 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2551 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2555 for (vnodeIndex = 0;
2556 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2557 nVnodes--, vnodeIndex++) {
2558 if (vnode->type != vNull) {
2559 int vnodeChanged = 0;
2560 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2561 if (VNDISK_GET_INO(vnode) == 0) {
2563 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2564 memset(vnode, 0, vcp->diskSize);
2568 if (vcp->magic != vnode->vnodeMagic) {
2569 /* bad magic #, probably partially created vnode */
2570 Log("Partially allocated vnode %d deleted.\n",
2572 memset(vnode, 0, vcp->diskSize);
2576 /* ****** Should do a bit more salvage here: e.g. make sure
2577 * vnode type matches what it should be given the index */
2578 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2579 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2580 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2581 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2588 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2589 /* The following doesn't work, because the version number
2590 * is not maintained correctly by the file server */
2591 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2592 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2594 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2600 /* For RW volume, look for vnode with matching inode number;
2601 * if no such match, take the first determined by our sort
2603 struct ViceInodeInfo *lip = ip;
2604 int lnInodes = nInodes;
2606 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2607 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2616 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2617 /* "Matching" inode */
2621 vu = vnode->uniquifier;
2622 iu = ip->u.vnode.vnodeUniquifier;
2623 vd = vnode->dataVersion;
2624 id = ip->u.vnode.inodeDataVersion;
2626 * Because of the possibility of the uniquifier overflows (> 4M)
2627 * we compare them modulo the low 22-bits; we shouldn't worry
2628 * about mismatching since they shouldn't to many old
2629 * uniquifiers of the same vnode...
2631 if (IUnique(vu) != IUnique(iu)) {
2633 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2636 vnode->uniquifier = iu;
2637 #ifdef AFS_3DISPARES
2638 vnode->dataVersion = (id >= vd ?
2641 1887437 ? vd : id) :
2644 1887437 ? id : vd));
2646 #if defined(AFS_SGI_EXMAG)
2647 vnode->dataVersion = (id >= vd ?
2650 15099494 ? vd : id) :
2653 15099494 ? id : vd));
2655 vnode->dataVersion = (id > vd ? id : vd);
2656 #endif /* AFS_SGI_EXMAG */
2657 #endif /* AFS_3DISPARES */
2660 /* don't bother checking for vd > id any more, since
2661 * partial file transfers always result in this state,
2662 * and you can't do much else anyway (you've already
2663 * found the best data you can) */
2664 #ifdef AFS_3DISPARES
2665 if (!vnodeIsDirectory(vnodeNumber)
2666 && ((vd < id && (id - vd) < 1887437)
2667 || ((vd > id && (vd - id) > 1887437)))) {
2669 #if defined(AFS_SGI_EXMAG)
2670 if (!vnodeIsDirectory(vnodeNumber)
2671 && ((vd < id && (id - vd) < 15099494)
2672 || ((vd > id && (vd - id) > 15099494)))) {
2674 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2675 #endif /* AFS_SGI_EXMAG */
2678 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2679 vnode->dataVersion = id;
2684 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2687 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2689 VNDISK_SET_INO(vnode, ip->inodeNumber);
2694 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2696 VNDISK_SET_INO(vnode, ip->inodeNumber);
2699 VNDISK_GET_LEN(vnodeLength, vnode);
2700 if (ip->byteCount != vnodeLength) {
2703 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2708 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2709 VNDISK_SET_LEN(vnode, ip->byteCount);
2713 ip->linkCount--; /* Keep the inode around */
2716 } else { /* no matching inode */
2718 if (VNDISK_GET_INO(vnode) != 0
2719 || vnode->type == vDirectory) {
2720 /* No matching inode--get rid of the vnode */
2722 if (VNDISK_GET_INO(vnode)) {
2724 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2728 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2733 if (VNDISK_GET_INO(vnode)) {
2735 time_t serverModifyTime = vnode->serverModifyTime;
2736 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2740 time_t serverModifyTime = vnode->serverModifyTime;
2741 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2744 memset(vnode, 0, vcp->diskSize);
2747 /* Should not reach here becuase we checked for
2748 * (inodeNumber == 0) above. And where we zero the vnode,
2749 * we also goto vnodeDone.
2753 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2757 } /* VNDISK_GET_INO(vnode) != 0 */
2759 osi_Assert(!(vnodeChanged && check));
2760 if (vnodeChanged && !Testing) {
2761 osi_Assert(IH_IWRITE
2762 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2763 (char *)vnode, vcp->diskSize)
2765 salvinfo->VolumeChanged = 1; /* For break call back */
2776 struct VnodeEssence *
2777 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2780 struct VnodeInfo *vip;
2783 class = vnodeIdToClass(vnodeNumber);
2784 vip = &salvinfo->vnodeInfo[class];
2785 offset = vnodeIdToBitNumber(vnodeNumber);
2786 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2790 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2792 /* Copy the directory unconditionally if we are going to change it:
2793 * not just if was cloned.
2795 struct VnodeDiskObject vnode;
2796 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2797 Inode oldinode, newinode;
2800 if (dir->copied || Testing)
2802 DFlush(); /* Well justified paranoia... */
2805 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2806 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2808 osi_Assert(code == sizeof(vnode));
2809 oldinode = VNDISK_GET_INO(&vnode);
2810 /* Increment the version number by a whole lot to avoid problems with
2811 * clients that were promised new version numbers--but the file server
2812 * crashed before the versions were written to disk.
2815 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2816 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2818 osi_Assert(VALID_INO(newinode));
2819 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2821 VNDISK_SET_INO(&vnode, newinode);
2823 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2824 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2826 osi_Assert(code == sizeof(vnode));
2828 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2829 salvinfo->fileSysDevice, newinode,
2830 &salvinfo->VolumeChanged);
2831 /* Don't delete the original inode right away, because the directory is
2832 * still being scanned.
2838 * This function should either successfully create a new dir, or give up
2839 * and leave things the way they were. In particular, if it fails to write
2840 * the new dir properly, it should return w/o changing the reference to the
2844 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2846 struct VnodeDiskObject vnode;
2847 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2848 Inode oldinode, newinode;
2853 afs_int32 parentUnique = 1;
2854 struct VnodeEssence *vnodeEssence;
2859 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2861 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2862 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2864 osi_Assert(lcode == sizeof(vnode));
2865 oldinode = VNDISK_GET_INO(&vnode);
2866 /* Increment the version number by a whole lot to avoid problems with
2867 * clients that were promised new version numbers--but the file server
2868 * crashed before the versions were written to disk.
2871 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2872 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2874 osi_Assert(VALID_INO(newinode));
2875 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2876 &salvinfo->VolumeChanged);
2878 /* Assign . and .. vnode numbers from dir and vnode.parent.
2879 * The uniquifier for . is in the vnode.
2880 * The uniquifier for .. might be set to a bogus value of 1 and
2881 * the salvager will later clean it up.
2883 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2884 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2887 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2889 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2894 /* didn't really build the new directory properly, let's just give up. */
2895 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2896 Log("Directory salvage returned code %d, continuing.\n", code);
2898 Log("also failed to decrement link count on new inode");
2902 Log("Checking the results of the directory salvage...\n");
2903 if (!DirOK(&newdir)) {
2904 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2905 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2906 osi_Assert(code == 0);
2910 VNDISK_SET_INO(&vnode, newinode);
2911 length = Length(&newdir);
2912 VNDISK_SET_LEN(&vnode, length);
2914 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2915 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2917 osi_Assert(lcode == sizeof(vnode));
2920 nt_sync(salvinfo->fileSysDevice);
2922 sync(); /* this is slow, but hopefully rarely called. We don't have
2923 * an open FD on the file itself to fsync.
2927 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2929 /* make sure old directory file is really closed */
2930 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2931 FDH_REALLYCLOSE(fdP);
2933 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2934 osi_Assert(code == 0);
2935 dir->dirHandle = newdir;
2939 * arguments for JudgeEntry.
2941 struct judgeEntry_params {
2942 struct DirSummary *dir; /**< directory we're examining entries in */
2943 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2947 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2950 struct judgeEntry_params *params = arock;
2951 struct DirSummary *dir = params->dir;
2952 struct SalvInfo *salvinfo = params->salvinfo;
2953 struct VnodeEssence *vnodeEssence;
2954 afs_int32 dirOrphaned, todelete;
2956 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2958 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2959 if (vnodeEssence == NULL) {
2961 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2964 CopyOnWrite(salvinfo, dir);
2965 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2970 #ifndef AFS_NAMEI_ENV
2971 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2972 * mount inode for the partition. If this inode were deleted, it would crash
2975 if (vnodeEssence->InodeNumber == 0) {
2976 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2978 CopyOnWrite(salvinfo, dir);
2979 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2986 if (!(vnodeNumber & 1) && !Showmode
2987 && !(vnodeEssence->count || vnodeEssence->unique
2988 || vnodeEssence->modeBits)) {
2989 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
2990 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2991 vnodeNumber, unique,
2992 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2996 CopyOnWrite(salvinfo, dir);
2997 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3003 /* Check if the Uniquifiers match. If not, change the directory entry
3004 * so its unique matches the vnode unique. Delete if the unique is zero
3005 * or if the directory is orphaned.
3007 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3008 if (!vnodeEssence->unique
3009 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3010 /* This is an orphaned directory. Don't delete the . or ..
3011 * entry. Otherwise, it will get created in the next
3012 * salvage and deleted again here. So Just skip it.
3017 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3020 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3024 fid.Vnode = vnodeNumber;
3025 fid.Unique = vnodeEssence->unique;
3026 CopyOnWrite(salvinfo, dir);
3027 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3029 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3032 return 0; /* no need to continue */
3035 if (strcmp(name, ".") == 0) {
3036 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3039 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3041 CopyOnWrite(salvinfo, dir);
3042 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3043 fid.Vnode = dir->vnodeNumber;
3044 fid.Unique = dir->unique;
3045 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3048 vnodeNumber = fid.Vnode; /* Get the new Essence */
3049 unique = fid.Unique;
3050 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3053 } else if (strcmp(name, "..") == 0) {
3056 struct VnodeEssence *dotdot;
3057 pa.Vnode = dir->parent;
3058 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3059 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3060 pa.Unique = dotdot->unique;
3062 pa.Vnode = dir->vnodeNumber;
3063 pa.Unique = dir->unique;
3065 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3067 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3069 CopyOnWrite(salvinfo, dir);
3070 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3071 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3074 vnodeNumber = pa.Vnode; /* Get the new Essence */
3076 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3078 dir->haveDotDot = 1;
3079 } else if (strncmp(name, ".__afs", 6) == 0) {
3081 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3084 CopyOnWrite(salvinfo, dir);
3085 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3087 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3088 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3091 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3092 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3093 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3094 && !(vnodeEssence->modeBits & 0111)) {
3101 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3102 vnodeEssence->InodeNumber);
3105 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3109 size = FDH_SIZE(fdP);
3111 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3112 FDH_REALLYCLOSE(fdP);
3119 nBytes = FDH_PREAD(fdP, buf, size, 0);
3120 if (nBytes == size) {
3122 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3123 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3124 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3125 Testing ? "would convert" : "converted");
3126 vnodeEssence->modeBits |= 0111;
3127 vnodeEssence->changed = 1;
3128 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3129 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3130 dir->name ? dir->name : "??", name, buf);
3132 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3133 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3135 FDH_REALLYCLOSE(fdP);
3138 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3139 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3140 if (vnodeIdToClass(vnodeNumber) == vLarge
3141 && vnodeEssence->name == NULL) {
3143 if ((n = (char *)malloc(strlen(name) + 1)))
3145 vnodeEssence->name = n;
3148 /* The directory entry points to the vnode. Check to see if the
3149 * vnode points back to the directory. If not, then let the
3150 * directory claim it (else it might end up orphaned). Vnodes
3151 * already claimed by another directory are deleted from this
3152 * directory: hardlinks to the same vnode are not allowed
3153 * from different directories.
3155 if (vnodeEssence->parent != dir->vnodeNumber) {
3156 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3157 /* Vnode does not point back to this directory.
3158 * Orphaned dirs cannot claim a file (it may belong to
3159 * another non-orphaned dir).
3162 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3164 vnodeEssence->parent = dir->vnodeNumber;
3165 vnodeEssence->changed = 1;
3167 /* Vnode was claimed by another directory */
3170 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3171 } else if (vnodeNumber == 1) {
3172 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3174 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3178 CopyOnWrite(salvinfo, dir);
3179 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3184 /* This directory claims the vnode */
3185 vnodeEssence->claimed = 1;
3187 vnodeEssence->count--;
3192 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3193 VnodeClass class, Inode ino, Unique * maxu)
3195 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3196 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3197 char buf[SIZEOF_LARGEDISKVNODE];
3198 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3200 StreamHandle_t *file;
3205 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3206 fdP = IH_OPEN(vip->handle);
3207 osi_Assert(fdP != NULL);
3208 file = FDH_FDOPEN(fdP, "r+");
3209 osi_Assert(file != NULL);
3210 size = OS_SIZE(fdP->fd_fd);
3211 osi_Assert(size != -1);
3212 vip->nVnodes = (size / vcp->diskSize) - 1;
3213 if (vip->nVnodes > 0) {
3214 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3215 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3216 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3217 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3218 if (class == vLarge) {
3219 osi_Assert((vip->inodes = (Inode *)
3220 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3229 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3230 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3231 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3232 nVnodes--, vnodeIndex++) {
3233 if (vnode->type != vNull) {
3234 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3235 afs_fsize_t vnodeLength;
3236 vip->nAllocatedVnodes++;
3237 vep->count = vnode->linkCount;
3238 VNDISK_GET_LEN(vnodeLength, vnode);
3239 vep->blockCount = nBlocks(vnodeLength);
3240 vip->volumeBlockCount += vep->blockCount;
3241 vep->parent = vnode->parent;
3242 vep->unique = vnode->uniquifier;
3243 if (*maxu < vnode->uniquifier)
3244 *maxu = vnode->uniquifier;
3245 vep->modeBits = vnode->modeBits;
3246 vep->InodeNumber = VNDISK_GET_INO(vnode);
3247 vep->type = vnode->type;
3248 vep->author = vnode->author;
3249 vep->owner = vnode->owner;
3250 vep->group = vnode->group;
3251 if (vnode->type == vDirectory) {
3252 if (class != vLarge) {
3253 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3254 vip->nAllocatedVnodes--;
3255 memset(vnode, 0, sizeof(vnode));
3256 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3257 vnodeIndexOffset(vcp, vnodeNumber),
3258 (char *)&vnode, sizeof(vnode));
3259 salvinfo->VolumeChanged = 1;
3261 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3270 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3273 struct VnodeEssence *parentvp;
3279 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3280 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3281 strcat(path, OS_DIRSEP);
3282 strcat(path, vp->name);
3288 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3289 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3292 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3294 struct VnodeEssence *vep;
3297 return (1); /* Vnode zero does not exist */
3299 return (0); /* The root dir vnode is always claimed */
3300 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3301 if (!vep || !vep->claimed)
3302 return (1); /* Vnode is not claimed - it is orphaned */
3304 return (IsVnodeOrphaned(salvinfo, vep->parent));
3308 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3309 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3310 struct DirSummary *rootdir, int *rootdirfound)
3312 static struct DirSummary dir;
3313 static struct DirHandle dirHandle;
3314 struct VnodeEssence *parent;
3315 static char path[MAXPATHLEN];
3318 if (dirVnodeInfo->vnodes[i].salvaged)
3319 return; /* already salvaged */
3322 dirVnodeInfo->vnodes[i].salvaged = 1;
3324 if (dirVnodeInfo->inodes[i] == 0)
3325 return; /* Not allocated to a directory */
3327 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3328 if (dirVnodeInfo->vnodes[i].parent) {
3329 Log("Bad parent, vnode 1; %s...\n",
3330 (Testing ? "skipping" : "salvaging"));
3331 dirVnodeInfo->vnodes[i].parent = 0;
3332 dirVnodeInfo->vnodes[i].changed = 1;
3335 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3336 if (parent && parent->salvaged == 0)
3337 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3338 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3339 rootdir, rootdirfound);
3342 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3343 dir.unique = dirVnodeInfo->vnodes[i].unique;
3346 dir.parent = dirVnodeInfo->vnodes[i].parent;
3347 dir.haveDot = dir.haveDotDot = 0;
3348 dir.ds_linkH = alinkH;
3349 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3350 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3352 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3355 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3356 (Testing ? "skipping" : "salvaging"));
3359 CopyAndSalvage(salvinfo, &dir);
3361 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3364 dirHandle = dir.dirHandle;
3367 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3368 &dirVnodeInfo->vnodes[i], path);
3371 /* If enumeration failed for random reasons, we will probably delete
3372 * too much stuff, so we guard against this instead.
3374 struct judgeEntry_params judge_params;
3375 judge_params.salvinfo = salvinfo;
3376 judge_params.dir = &dir;
3378 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3381 /* Delete the old directory if it was copied in order to salvage.
3382 * CopyOnWrite has written the new inode # to the disk, but we still
3383 * have the old one in our local structure here. Thus, we idec the
3387 if (dir.copied && !Testing) {
3388 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3389 osi_Assert(code == 0);
3390 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3393 /* Remember rootdir DirSummary _after_ it has been judged */
3394 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3395 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3403 * Get a new FID that can be used to create a new file.
3405 * @param[in] volHeader vol header for the volume
3406 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3407 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3408 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3409 * updated to the new max unique if we create a new
3413 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3414 VnodeClass class, AFSFid *afid, Unique *maxunique)
3417 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3418 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3422 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3423 /* no free vnodes; make a new one */
3424 salvinfo->vnodeInfo[class].nVnodes++;
3425 salvinfo->vnodeInfo[class].vnodes =
3426 realloc(salvinfo->vnodeInfo[class].vnodes,
3427 sizeof(struct VnodeEssence) * (i+1));
3429 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3432 afid->Vnode = bitNumberToVnodeNumber(i, class);
3434 if (volHeader->uniquifier < (*maxunique + 1)) {
3435 /* header uniq is bad; it will get bumped by 2000 later */
3436 afid->Unique = *maxunique + 1 + 2000;
3439 /* header uniq seems okay; just use that */
3440 afid->Unique = *maxunique = volHeader->uniquifier++;
3445 * Create a vnode for a README file explaining not to use a recreated-root vol.
3447 * @param[in] volHeader vol header for the volume
3448 * @param[in] alinkH ihandle for i/o for the volume
3449 * @param[in] vid volume id
3450 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3451 * updated to the new max unique if we create a new
3453 * @param[out] afid FID for the new readme vnode
3454 * @param[out] ainode the inode for the new readme file
3456 * @return operation status
3461 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3462 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3466 struct VnodeDiskObject *rvnode = NULL;
3468 IHandle_t *readmeH = NULL;
3469 struct VnodeEssence *vep;
3471 time_t now = time(NULL);
3473 /* Try to make the note brief, but informative. Only administrators should
3474 * be able to read this file at first, so we can hopefully assume they
3475 * know what AFS is, what a volume is, etc. */
3477 "This volume has been salvaged, but has lost its original root directory.\n"
3478 "The root directory that exists now has been recreated from orphan files\n"
3479 "from the rest of the volume. This recreated root directory may interfere\n"
3480 "with old cached data on clients, and there is no way the salvager can\n"
3481 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3482 "use this volume, but only copy the salvaged data to a new volume.\n"
3483 "Continuing to use this volume as it exists now may cause some clients to\n"
3484 "behave oddly when accessing this volume.\n"
3485 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3486 /* ^ the person reading this probably just lost some data, so they could
3487 * use some cheering up. */
3489 /* -1 for the trailing NUL */
3490 length = sizeof(readme) - 1;
3492 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3494 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3496 /* create the inode and write the contents */
3497 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3498 salvinfo->fileSysPath, 0, vid,
3499 afid->Vnode, afid->Unique, 1);
3500 if (!VALID_INO(readmeinode)) {
3501 Log("CreateReadme: readme IH_CREATE failed\n");
3505 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3506 bytes = IH_IWRITE(readmeH, 0, readme, length);
3507 IH_RELEASE(readmeH);
3509 if (bytes != length) {
3510 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3511 (int)sizeof(readme));
3515 /* create the vnode and write it out */
3516 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3518 Log("CreateRootDir: error alloc'ing memory\n");
3522 rvnode->type = vFile;
3524 rvnode->modeBits = 0777;
3525 rvnode->linkCount = 1;
3526 VNDISK_SET_LEN(rvnode, length);
3527 rvnode->uniquifier = afid->Unique;
3528 rvnode->dataVersion = 1;
3529 VNDISK_SET_INO(rvnode, readmeinode);
3530 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3535 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3537 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3538 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3539 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3541 if (bytes != SIZEOF_SMALLDISKVNODE) {
3542 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3543 (int)SIZEOF_SMALLDISKVNODE);
3547 /* update VnodeEssence for new readme vnode */
3548 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3550 vep->blockCount = nBlocks(length);
3551 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3552 vep->parent = rvnode->parent;
3553 vep->unique = rvnode->uniquifier;
3554 vep->modeBits = rvnode->modeBits;
3555 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3556 vep->type = rvnode->type;
3557 vep->author = rvnode->author;
3558 vep->owner = rvnode->owner;
3559 vep->group = rvnode->group;
3569 *ainode = readmeinode;
3574 if (IH_DEC(alinkH, readmeinode, vid)) {
3575 Log("CreateReadme (recovery): IH_DEC failed\n");
3587 * create a root dir for a volume that lacks one.
3589 * @param[in] volHeader vol header for the volume
3590 * @param[in] alinkH ihandle for disk access for this volume group
3591 * @param[in] vid volume id we're dealing with
3592 * @param[out] rootdir populated with info about the new root dir
3593 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3594 * updated to the new max unique if we create a new
3597 * @return operation status
3602 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3603 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3607 int decroot = 0, decreadme = 0;
3608 AFSFid did, readmeid;
3611 struct VnodeDiskObject *rootvnode = NULL;
3612 struct acl_accessList *ACL;
3615 struct VnodeEssence *vep;
3617 time_t now = time(NULL);
3619 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3620 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3624 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3625 /* We don't have any large vnodes in the volume; allocate room
3626 * for one so we can recreate the root dir */
3627 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3628 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3629 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3631 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3632 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3635 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3636 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3637 if (vep->type != vNull) {
3638 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3642 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3643 &readmeinode) != 0) {
3648 /* set the DV to a very high number, so it is unlikely that we collide
3649 * with a cached DV */
3652 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3654 if (!VALID_INO(rootinode)) {
3655 Log("CreateRootDir: IH_CREATE failed\n");
3660 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3661 rootinode, &salvinfo->VolumeChanged);
3665 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3666 Log("CreateRootDir: MakeDir failed\n");
3669 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3670 Log("CreateRootDir: Create failed\n");
3674 length = Length(&rootdir->dirHandle);
3675 DZap((void *)&rootdir->dirHandle);
3677 /* create the new root dir vnode */
3678 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3680 Log("CreateRootDir: malloc failed\n");
3684 /* only give 'rl' permissions to 'system:administrators'. We do this to
3685 * try to catch the attention of an administrator, that they should not
3686 * be writing to this directory or continue to use it. */
3687 ACL = VVnodeDiskACL(rootvnode);
3688 ACL->size = sizeof(struct acl_accessList);
3689 ACL->version = ACL_ACLVERSION;
3693 ACL->entries[0].id = -204; /* system:administrators */
3694 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3696 rootvnode->type = vDirectory;
3697 rootvnode->cloned = 0;
3698 rootvnode->modeBits = 0777;
3699 rootvnode->linkCount = 2;
3700 VNDISK_SET_LEN(rootvnode, length);
3701 rootvnode->uniquifier = 1;
3702 rootvnode->dataVersion = dv;
3703 VNDISK_SET_INO(rootvnode, rootinode);
3704 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3705 rootvnode->author = 0;
3706 rootvnode->owner = 0;
3707 rootvnode->parent = 0;
3708 rootvnode->group = 0;
3709 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3711 /* write it out to disk */
3712 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3713 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3714 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3716 if (bytes != SIZEOF_LARGEDISKVNODE) {
3717 /* just cast to int and don't worry about printing real 64-bit ints;
3718 * a large disk vnode isn't anywhere near the 32-bit limit */
3719 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3720 (int)SIZEOF_LARGEDISKVNODE);
3724 /* update VnodeEssence for the new root vnode */
3725 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3727 vep->blockCount = nBlocks(length);
3728 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3729 vep->parent = rootvnode->parent;
3730 vep->unique = rootvnode->uniquifier;
3731 vep->modeBits = rootvnode->modeBits;
3732 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3733 vep->type = rootvnode->type;
3734 vep->author = rootvnode->author;
3735 vep->owner = rootvnode->owner;
3736 vep->group = rootvnode->group;
3746 /* update DirSummary for the new root vnode */
3747 rootdir->vnodeNumber = 1;
3748 rootdir->unique = 1;
3749 rootdir->haveDot = 1;
3750 rootdir->haveDotDot = 1;
3751 rootdir->rwVid = vid;
3752 rootdir->copied = 0;
3753 rootdir->parent = 0;
3754 rootdir->name = strdup(".");
3755 rootdir->vname = volHeader->name;
3756 rootdir->ds_linkH = alinkH;
3763 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3764 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3766 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3767 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3777 * salvage a volume group.
3779 * @param[in] salvinfo information for the curent salvage job
3780 * @param[in] rwIsp inode summary for rw volume
3781 * @param[in] alinkH link table inode handle
3783 * @return operation status
3787 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3789 /* This routine, for now, will only be called for read-write volumes */
3791 int BlocksInVolume = 0, FilesInVolume = 0;
3793 struct DirSummary rootdir, oldrootdir;
3794 struct VnodeInfo *dirVnodeInfo;
3795 struct VnodeDiskObject vnode;
3796 VolumeDiskData volHeader;
3798 int orphaned, rootdirfound = 0;
3799 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3800 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3801 struct VnodeEssence *vep;
3804 afs_sfsize_t nBytes;
3806 VnodeId LFVnode, ThisVnode;
3807 Unique LFUnique, ThisUnique;
3811 vid = rwIsp->volSummary->header.id;
3812 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3813 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3814 osi_Assert(nBytes == sizeof(volHeader));
3815 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3816 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3817 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3819 DistilVnodeEssence(salvinfo, vid, vLarge,
3820 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3821 DistilVnodeEssence(salvinfo, vid, vSmall,
3822 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3824 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3825 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3826 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3827 &rootdir, &rootdirfound);
3830 nt_sync(salvinfo->fileSysDevice);
3832 sync(); /* This used to be done lower level, for every dir */
3839 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3841 Log("Cannot find root directory for volume %lu; attempting to create "
3842 "a new one\n", afs_printable_uint32_lu(vid));
3844 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3849 salvinfo->VolumeChanged = 1;
3853 /* Parse each vnode looking for orphaned vnodes and
3854 * connect them to the tree as orphaned (if requested).
3856 oldrootdir = rootdir;
3857 for (class = 0; class < nVNODECLASSES; class++) {
3858 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3859 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3860 ThisVnode = bitNumberToVnodeNumber(v, class);
3861 ThisUnique = vep->unique;
3863 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3864 continue; /* Ignore unused, claimed, and root vnodes */
3866 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3867 * entry in this vnode had incremented the parent link count (In
3868 * JudgeEntry()). We need to go to the parent and decrement that
3869 * link count. But if the parent's unique is zero, then the parent
3870 * link count was not incremented in JudgeEntry().
3872 if (class == vLarge) { /* directory vnode */
3873 pv = vnodeIdToBitNumber(vep->parent);
3874 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3875 if (vep->parent == 1 && newrootdir) {
3876 /* this vnode's parent was the volume root, and
3877 * we just created the volume root. So, the parent
3878 * dir didn't exist during JudgeEntry, so the link
3879 * count was not inc'd there, so don't dec it here.
3885 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3891 continue; /* If no rootdir, can't attach orphaned files */
3893 /* Here we attach orphaned files and directories into the
3894 * root directory, LVVnode, making sure link counts stay correct.
3896 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3897 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3898 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3900 /* Update this orphaned vnode's info. Its parent info and
3901 * link count (do for orphaned directories and files).
3903 vep->parent = LFVnode; /* Parent is the root dir */
3904 vep->unique = LFUnique;
3907 vep->count--; /* Inc link count (root dir will pt to it) */
3909 /* If this orphaned vnode is a directory, change '..'.
3910 * The name of the orphaned dir/file is unknown, so we
3911 * build a unique name. No need to CopyOnWrite the directory
3912 * since it is not connected to tree in BK or RO volume and
3913 * won't be visible there.
3915 if (class == vLarge) {
3919 /* Remove and recreate the ".." entry in this orphaned directory */
3920 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3921 salvinfo->vnodeInfo[class].inodes[v],
3922 &salvinfo->VolumeChanged);
3924 pa.Unique = LFUnique;
3925 osi_Assert(Delete(&dh, "..") == 0);
3926 osi_Assert(Create(&dh, "..", &pa) == 0);
3928 /* The original parent's link count was decremented above.
3929 * Here we increment the new parent's link count.
3931 pv = vnodeIdToBitNumber(LFVnode);
3932 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3936 /* Go to the root dir and add this entry. The link count of the
3937 * root dir was incremented when ".." was created. Try 10 times.
3939 for (j = 0; j < 10; j++) {
3940 pa.Vnode = ThisVnode;
3941 pa.Unique = ThisUnique;
3943 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3945 vLarge) ? "__ORPHANDIR__" :
3946 "__ORPHANFILE__"), ThisVnode,
3949 CopyOnWrite(salvinfo, &rootdir);
3950 code = Create(&rootdir.dirHandle, npath, &pa);
3954 ThisUnique += 50; /* Try creating a different file */
3956 osi_Assert(code == 0);
3957 Log("Attaching orphaned %s to volume's root dir as %s\n",
3958 ((class == vLarge) ? "directory" : "file"), npath);
3960 } /* for each vnode in the class */
3961 } /* for each class of vnode */
3963 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3965 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3967 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3969 osi_Assert(code == 0);
3970 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3973 DFlush(); /* Flush the changes */
3974 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3975 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3976 orphans = ORPH_IGNORE;
3979 /* Write out all changed vnodes. Orphaned files and directories
3980 * will get removed here also (if requested).
3982 for (class = 0; class < nVNODECLASSES; class++) {
3983 int nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3984 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3985 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3986 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
3987 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
3988 for (i = 0; i < nVnodes; i++) {
3989 struct VnodeEssence *vnp = &vnodes[i];
3990 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3992 /* If the vnode is good but is unclaimed (not listed in
3993 * any directory entries), then it is orphaned.
3996 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
3997 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4001 if (vnp->changed || vnp->count) {
4004 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4005 vnodeIndexOffset(vcp, vnodeNumber),
4006 (char *)&vnode, sizeof(vnode));
4007 osi_Assert(nBytes == sizeof(vnode));
4009 vnode.parent = vnp->parent;
4010 oldCount = vnode.linkCount;
4011 vnode.linkCount = vnode.linkCount - vnp->count;
4014 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4016 if (!vnp->todelete) {
4017 /* Orphans should have already been attached (if requested) */
4018 osi_Assert(orphans != ORPH_ATTACH);
4019 oblocks += vnp->blockCount;
4022 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4024 BlocksInVolume -= vnp->blockCount;
4026 if (VNDISK_GET_INO(&vnode)) {
4028 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4029 osi_Assert(code == 0);
4031 memset(&vnode, 0, sizeof(vnode));
4033 } else if (vnp->count) {
4035 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4038 vnode.modeBits = vnp->modeBits;
4041 vnode.dataVersion++;
4044 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4045 vnodeIndexOffset(vcp, vnodeNumber),
4046 (char *)&vnode, sizeof(vnode));
4047 osi_Assert(nBytes == sizeof(vnode));
4049 salvinfo->VolumeChanged = 1;
4053 if (!Showmode && ofiles) {
4054 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4056 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4060 for (class = 0; class < nVNODECLASSES; class++) {
4061 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4062 for (i = 0; i < vip->nVnodes; i++)
4063 if (vip->vnodes[i].name)
4064 free(vip->vnodes[i].name);
4071 /* Set correct resource utilization statistics */
4072 volHeader.filecount = FilesInVolume;
4073 volHeader.diskused = BlocksInVolume;
4075 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4076 if (volHeader.uniquifier < (maxunique + 1)) {
4078 Log("Volume uniquifier is too low; fixed\n");
4079 /* Plus 2,000 in case there are workstations out there with
4080 * cached vnodes that have since been deleted
4082 volHeader.uniquifier = (maxunique + 1 + 2000);
4086 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4087 "Only use this salvaged volume to copy data to another volume; "
4088 "do not continue to use this volume (%lu) as-is.\n",
4089 afs_printable_uint32_lu(vid));
4092 #ifdef FSSYNC_BUILD_CLIENT
4093 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4094 afs_int32 fsync_code;
4096 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4098 Log("Error trying to tell the fileserver to break callbacks for "
4099 "changed volume %lu; error code %ld\n",
4100 afs_printable_uint32_lu(vid),
4101 afs_printable_int32_ld(fsync_code));
4103 salvinfo->VolumeChanged = 0;
4106 #endif /* FSSYNC_BUILD_CLIENT */
4108 /* Turn off the inUse bit; the volume's been salvaged! */
4109 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4110 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4111 volHeader.inService = 1; /* allow service again */
4112 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4113 volHeader.dontSalvage = DONT_SALVAGE;
4114 salvinfo->VolumeChanged = 0;
4116 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4117 osi_Assert(nBytes == sizeof(volHeader));
4120 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4121 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4122 FilesInVolume, BlocksInVolume);
4125 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4126 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4132 ClearROInUseBit(struct VolumeSummary *summary)
4134 IHandle_t *h = summary->volumeInfoHandle;
4135 afs_sfsize_t nBytes;
4137 VolumeDiskData volHeader;
4139 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4140 osi_Assert(nBytes == sizeof(volHeader));
4141 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4142 volHeader.inUse = 0;
4143 volHeader.needsSalvaged = 0;
4144 volHeader.inService = 1;
4145 volHeader.dontSalvage = DONT_SALVAGE;
4147 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4148 osi_Assert(nBytes == sizeof(volHeader));
4153 * Possible delete the volume.
4155 * deleteMe - Always do so, only a partial volume.
4158 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4159 char *message, int deleteMe, int check)
4161 if (readOnly(isp) || deleteMe) {
4162 if (isp->volSummary && isp->volSummary->fileName) {
4165 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4167 Log("It will be deleted on this server (you may find it elsewhere)\n");
4170 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4172 Log("it will be deleted instead. It should be recloned.\n");
4177 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4179 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4181 Log("Error %ld destroying volume disk header for volume %lu\n",
4182 afs_printable_int32_ld(code),
4183 afs_printable_uint32_lu(isp->volumeId));
4186 /* make sure we actually delete the fileName file; ENOENT
4187 * is fine, since VDestroyVolumeDiskHeader probably already
4189 if (unlink(path) && errno != ENOENT) {
4190 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4194 } else if (!check) {
4195 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4197 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4201 #ifdef AFS_DEMAND_ATTACH_FS
4203 * Locks a volume on disk for salvaging.
4205 * @param[in] volumeId volume ID to lock
4207 * @return operation status
4209 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4210 * checked out and locked again
4215 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4220 /* should always be WRITE_LOCK, but keep the lock-type logic all
4221 * in one place, in VVolLockType. Params will be ignored, but
4222 * try to provide what we're logically doing. */
4223 locktype = VVolLockType(V_VOLUPD, 1);
4225 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4227 if (code == EBUSY) {
4228 Abort("Someone else appears to be using volume %lu; Aborted\n",
4229 afs_printable_uint32_lu(volumeId));
4231 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4232 afs_printable_int32_ld(code),
4233 afs_printable_uint32_lu(volumeId));
4236 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4237 if (code == SYNC_DENIED) {
4238 /* need to retry checking out volumes */
4241 if (code != SYNC_OK) {
4242 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4243 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4246 /* set inUse = programType in the volume header to ensure that nobody
4247 * tries to use this volume again without salvaging, if we somehow crash
4248 * or otherwise exit before finishing the salvage.
4252 struct VolumeHeader header;
4253 struct VolumeDiskHeader diskHeader;
4254 struct VolumeDiskData volHeader;
4256 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4261 DiskToVolumeHeader(&header, &diskHeader);
4263 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4264 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4265 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4271 volHeader.inUse = programType;
4273 /* If we can't re-write the header, bail out and error. We don't
4274 * assert when reading the header, since it's possible the
4275 * header isn't really there (when there's no data associated
4276 * with the volume; we just delete the vol header file in that
4277 * case). But if it's there enough that we can read it, but
4278 * somehow we cannot write to it to signify we're salvaging it,
4279 * we've got a big problem and we cannot continue. */
4280 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4287 #endif /* AFS_DEMAND_ATTACH_FS */
4290 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4295 memset(&res, 0, sizeof(res));
4297 for (i = 0; i < 3; i++) {
4298 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4299 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4301 if (code == SYNC_OK) {
4303 } else if (code == SYNC_DENIED) {
4304 #ifdef DEMAND_ATTACH_ENABLE
4305 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4307 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4309 Abort("Salvage aborted\n");
4310 } else if (code == SYNC_BAD_COMMAND) {
4311 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4313 #ifdef DEMAND_ATTACH_ENABLE
4314 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4316 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4318 Abort("Salvage aborted\n");
4321 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4322 FSYNC_clientFinis();
4326 if (code != SYNC_OK) {
4327 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4328 Abort("Salvage aborted\n");
4333 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4337 for (i = 0; i < 3; i++) {
4338 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4339 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4341 if (code == SYNC_OK) {
4343 } else if (code == SYNC_DENIED) {
4344 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4345 } else if (code == SYNC_BAD_COMMAND) {
4346 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4348 #ifdef DEMAND_ATTACH_ENABLE
4349 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4351 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4356 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4357 FSYNC_clientFinis();
4364 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4366 /* Volume parameter is passed in case iopen is upgraded in future to
4367 * require a volume Id to be passed
4370 IHandle_t *srcH, *destH;
4371 FdHandle_t *srcFdP, *destFdP;
4373 afs_foff_t size = 0;
4375 IH_INIT(srcH, device, rwvolume, inode1);
4376 srcFdP = IH_OPEN(srcH);
4377 osi_Assert(srcFdP != NULL);
4378 IH_INIT(destH, device, rwvolume, inode2);
4379 destFdP = IH_OPEN(destH);
4380 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4381 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4384 osi_Assert(nBytes == 0);
4385 FDH_REALLYCLOSE(srcFdP);
4386 FDH_REALLYCLOSE(destFdP);
4393 PrintInodeList(struct SalvInfo *salvinfo)
4395 struct ViceInodeInfo *ip;
4396 struct ViceInodeInfo *buf;
4397 struct afs_stat status;
4401 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4402 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4403 osi_Assert(buf != NULL);
4404 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4405 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4406 for (ip = buf; nInodes--; ip++) {
4407 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4408 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4409 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4410 ip->u.param[2], ip->u.param[3]);
4416 PrintInodeSummary(struct SalvInfo *salvinfo)
4419 struct InodeSummary *isp;
4421 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4422 isp = &salvinfo->inodeSummary[i];
4423 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4428 PrintVolumeSummary(struct SalvInfo *salvinfo)
4431 struct VolumeSummary *vsp;
4433 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4434 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4444 osi_Assert(0); /* Fork is never executed in the NT code path */
4448 #ifdef AFS_DEMAND_ATTACH_FS
4449 if ((f == 0) && (programType == salvageServer)) {
4450 /* we are a salvageserver child */
4451 #ifdef FSSYNC_BUILD_CLIENT
4452 VChildProcReconnectFS_r();
4454 #ifdef SALVSYNC_BUILD_CLIENT
4458 #endif /* AFS_DEMAND_ATTACH_FS */
4459 #endif /* !AFS_NT40_ENV */
4469 #ifdef AFS_DEMAND_ATTACH_FS
4470 if (programType == salvageServer) {
4471 #ifdef SALVSYNC_BUILD_CLIENT
4474 #ifdef FSSYNC_BUILD_CLIENT
4478 #endif /* AFS_DEMAND_ATTACH_FS */
4481 if (main_thread != pthread_self())
4482 pthread_exit((void *)code);
4495 pid = wait(&status);
4496 osi_Assert(pid != -1);
4497 if (WCOREDUMP(status))
4498 Log("\"%s\" core dumped!\n", prog);
4499 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4505 TimeStamp(time_t clock, int precision)
4508 static char timestamp[20];
4509 lt = localtime(&clock);
4511 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4513 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4518 CheckLogFile(char * log_path)
4520 char oldSlvgLog[AFSDIR_PATH_MAX];
4522 #ifndef AFS_NT40_ENV
4529 strcpy(oldSlvgLog, log_path);
4530 strcat(oldSlvgLog, ".old");
4532 renamefile(log_path, oldSlvgLog);
4533 logFile = afs_fopen(log_path, "a");
4535 if (!logFile) { /* still nothing, use stdout */
4539 #ifndef AFS_NAMEI_ENV
4540 AFS_DEBUG_IOPS_LOG(logFile);
4545 #ifndef AFS_NT40_ENV
4547 TimeStampLogFile(char * log_path)
4549 char stampSlvgLog[AFSDIR_PATH_MAX];
4554 lt = localtime(&now);
4555 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4556 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4557 log_path, lt->tm_year + 1900,
4558 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4561 /* try to link the logfile to a timestamped filename */
4562 /* if it fails, oh well, nothing we can do */
4563 link(log_path, stampSlvgLog);
4572 #ifndef AFS_NT40_ENV
4574 printf("Can't show log since using syslog.\n");
4585 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4588 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4591 while (fgets(line, sizeof(line), logFile))
4598 Log(const char *format, ...)
4604 va_start(args, format);
4605 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4607 #ifndef AFS_NT40_ENV
4609 syslog(LOG_INFO, "%s", tmp);
4613 gettimeofday(&now, 0);
4614 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4620 Abort(const char *format, ...)
4625 va_start(args, format);
4626 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4628 #ifndef AFS_NT40_ENV
4630 syslog(LOG_INFO, "%s", tmp);
4634 fprintf(logFile, "%s", tmp);
4646 ToString(const char *s)
4649 p = (char *)malloc(strlen(s) + 1);
4650 osi_Assert(p != NULL);
4655 /* Remove the FORCESALVAGE file */
4657 RemoveTheForce(char *path)
4660 struct afs_stat force; /* so we can use afs_stat to find it */
4661 strcpy(target,path);
4662 strcat(target,"/FORCESALVAGE");
4663 if (!Testing && ForceSalvage) {
4664 if (afs_stat(target,&force) == 0) unlink(target);
4668 #ifndef AFS_AIX32_ENV
4670 * UseTheForceLuke - see if we can use the force
4673 UseTheForceLuke(char *path)
4675 struct afs_stat force;
4677 strcpy(target,path);
4678 strcat(target,"/FORCESALVAGE");
4680 return (afs_stat(target, &force) == 0);
4684 * UseTheForceLuke - see if we can use the force
4687 * The VRMIX fsck will not muck with the filesystem it is supposedly
4688 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4689 * muck directly with the root inode, which is within the normal
4691 * ListViceInodes() has a side effect of setting ForceSalvage if
4692 * it detects a need, based on root inode examination.
4695 UseTheForceLuke(char *path)
4698 return 0; /* sorry OB1 */
4703 /* NT support routines */
4705 static char execpathname[MAX_PATH];
4707 nt_SalvagePartition(char *partName, int jobn)
4712 if (!*execpathname) {
4713 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4714 if (!n || n == 1023)
4717 job.cj_magic = SALVAGER_MAGIC;
4718 job.cj_number = jobn;
4719 (void)strcpy(job.cj_part, partName);
4720 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4725 nt_SetupPartitionSalvage(void *datap, int len)
4727 childJob_t *jobp = (childJob_t *) datap;
4728 char logname[AFSDIR_PATH_MAX];
4730 if (len != sizeof(childJob_t))
4732 if (jobp->cj_magic != SALVAGER_MAGIC)
4737 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4739 logFile = afs_fopen(logname, "w");
4747 #endif /* AFS_NT40_ENV */