2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
193 #include "vol_internal.h"
195 #include <afs/prs_fs.h>
197 #ifdef FSSYNC_BUILD_CLIENT
198 #include "vg_cache.h"
205 /*@+fcnmacros +macrofcndecl@*/
208 extern off64_t afs_lseek(int FD, off64_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
211 #define afs_stat stat64
212 #define afs_fstat fstat64
213 #define afs_open open64
214 #define afs_fopen fopen64
215 #else /* !O_LARGEFILE */
217 extern off_t afs_lseek(int FD, off_t O, int F);
218 #endif /*S_SPLINT_S */
219 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
220 #define afs_stat stat
221 #define afs_fstat fstat
222 #define afs_open open
223 #define afs_fopen fopen
224 #endif /* !O_LARGEFILE */
225 /*@=fcnmacros =macrofcndecl@*/
228 extern void *calloc();
230 static char *TimeStamp(time_t clock, int precision);
233 int debug; /* -d flag */
234 extern int Testing; /* -n flag */
235 int ListInodeOption; /* -i flag */
236 int ShowRootFiles; /* -r flag */
237 int RebuildDirs; /* -sal flag */
238 int Parallel = 4; /* -para X flag */
239 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
240 int forceR = 0; /* -b flag */
241 int ShowLog = 0; /* -showlog flag */
242 int ShowSuid = 0; /* -showsuid flag */
243 int ShowMounts = 0; /* -showmounts flag */
244 int orphans = ORPH_IGNORE; /* -orphans option */
249 int useSyslog = 0; /* -syslog flag */
250 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
259 #define MAXPARALLEL 32
261 int OKToZap; /* -o flag */
262 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
263 * in the volume header */
265 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
267 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
270 * information that is 'global' to a particular salvage job.
273 Device fileSysDevice; /**< The device number of the current partition
275 char fileSysPath[8]; /**< The path of the mounted partition currently
276 * being salvaged, i.e. the directory containing
277 * the volume headers */
278 char *fileSysPathName; /**< NT needs this to make name pretty log. */
279 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
280 int VGLinkH_cnt; /**< # of references to lnk handle. */
281 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
284 char *fileSysDeviceName; /**< The block device where the file system being
285 * salvaged was mounted */
286 char *filesysfulldev;
288 int VolumeChanged; /**< Set by any routine which would change the
289 * volume in a way which would require callbacks
290 * to be broken if the volume was put back on
291 * on line by an active file server */
293 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
294 * header dealt with */
296 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
297 int inodeFd; /**< File descriptor for inode file */
299 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
300 int nVolumes; /**< Number of volumes (read-write and read-only)
301 * in volume summary */
302 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
305 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
306 * vnodes in the volume that
307 * we are currently looking
315 /* Forward declarations */
316 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
317 static int AskVolumeSummary(struct SalvInfo *salvinfo,
318 VolumeId singleVolumeNumber);
320 #ifdef AFS_DEMAND_ATTACH_FS
321 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
322 #endif /* AFS_DEMAND_ATTACH_FS */
324 /* Uniquifier stored in the Inode */
329 return (u & 0x3fffff);
331 #if defined(AFS_SGI_EXMAG)
332 return (u & SGI_UNIQMASK);
335 #endif /* AFS_SGI_EXMAG */
342 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
344 return 0; /* otherwise may be transient, e.g. EMFILE */
349 char *save_args[MAX_ARGS];
351 extern pthread_t main_thread;
352 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
356 * Get the salvage lock if not already held. Hold until process exits.
358 * @param[in] locktype READ_LOCK or WRITE_LOCK
361 _ObtainSalvageLock(int locktype)
363 struct VLockFile salvageLock;
368 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
370 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
373 "salvager: There appears to be another salvager running! "
378 "salvager: Error %d trying to acquire salvage lock! "
384 ObtainSalvageLock(void)
386 _ObtainSalvageLock(WRITE_LOCK);
389 ObtainSharedSalvageLock(void)
391 _ObtainSalvageLock(READ_LOCK);
395 #ifdef AFS_SGI_XFS_IOPS_ENV
396 /* Check if the given partition is mounted. For XFS, the root inode is not a
397 * constant. So we check the hard way.
400 IsPartitionMounted(char *part)
403 struct mntent *mntent;
405 assert(mntfp = setmntent(MOUNTED, "r"));
406 while (mntent = getmntent(mntfp)) {
407 if (!strcmp(part, mntent->mnt_dir))
412 return mntent ? 1 : 1;
415 /* Check if the given inode is the root of the filesystem. */
416 #ifndef AFS_SGI_XFS_IOPS_ENV
418 IsRootInode(struct afs_stat *status)
421 * The root inode is not a fixed value in XFS partitions. So we need to
422 * see if the partition is in the list of mounted partitions. This only
423 * affects the SalvageFileSys path, so we check there.
425 return (status->st_ino == ROOTINODE);
430 #ifndef AFS_NAMEI_ENV
431 /* We don't want to salvage big files filesystems, since we can't put volumes on
435 CheckIfBigFilesFS(char *mountPoint, char *devName)
437 struct superblock fs;
440 if (strncmp(devName, "/dev/", 5)) {
441 (void)sprintf(name, "/dev/%s", devName);
443 (void)strcpy(name, devName);
446 if (ReadSuper(&fs, name) < 0) {
447 Log("Unable to read superblock. Not salvaging partition %s.\n",
451 if (IsBigFilesFileSystem(&fs)) {
452 Log("Partition %s is a big files filesystem, not salvaging.\n",
462 #define HDSTR "\\Device\\Harddisk"
463 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
465 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
470 static int dowarn = 1;
472 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
474 if (strncmp(res, HDSTR, HDLEN)) {
477 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
478 res, HDSTR, p1->devName);
482 d1 = atoi(&res[HDLEN]);
484 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
486 if (strncmp(res, HDSTR, HDLEN)) {
489 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
490 res, HDSTR, p2->devName);
494 d2 = atoi(&res[HDLEN]);
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, '/');
720 ptr = (char *)strrchr(pbuffer, '/');
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s\\", salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 AskOffline(salvinfo, singleVolumeNumber);
794 #ifdef AFS_DEMAND_ATTACH_FS
795 if (LockVolume(salvinfo, singleVolumeNumber)) {
798 #endif /* AFS_DEMAND_ATTACH_FS */
801 VLockPartition(partP->name);
805 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
808 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
809 partP->name, name, (Testing ? "(READONLY mode)" : ""));
811 Log("***Forced salvage of all volumes on this partition***\n");
816 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
823 assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
824 while ((dp = readdir(dirp))) {
825 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
826 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
828 Log("Removing old salvager temp files %s\n", dp->d_name);
829 strcpy(npath, salvinfo->fileSysPath);
831 strcat(npath, dp->d_name);
837 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
839 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
840 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
842 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
846 inodeFile = fopen(inodeListPath, "w+b");
848 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
851 /* Using nt_unlink here since we're really using the delete on close
852 * semantics of unlink. In most places in the salvager, we really do
853 * mean to unlink the file at that point. Those places have been
854 * modified to actually do that so that the NT crt can be used there.
856 code = nt_unlink(inodeListPath);
858 code = unlink(inodeListPath);
861 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
864 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
868 salvinfo->inodeFd = fileno(inodeFile);
869 if (salvinfo->inodeFd == -1)
870 Abort("Temporary file %s is missing...\n", inodeListPath);
871 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
872 if (ListInodeOption) {
873 PrintInodeList(salvinfo);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
929 #ifdef AFS_DEMAND_ATTACH_FS
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS */
934 AskOnline(salvinfo, singleVolumeNumber);
936 /* Step through the volumeSummary list and set all volumes on-line.
937 * The volumes were taken off-line in GetVolumeSummary.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
944 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
945 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
948 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
952 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
955 sprintf(path, "%s/%s", salvinfo->fileSysPath, vsp->fileName);
958 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
961 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
963 Log("Error %ld destroying volume disk header for volume %lu\n",
964 afs_printable_int32_ld(code),
965 afs_printable_uint32_lu(vsp->header.id));
968 /* make sure we actually delete the fileName file; ENOENT
969 * is fine, since VDestroyVolumeDiskHeader probably already
971 if (unlink(path) && errno != ENOENT) {
972 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 CompareInodes(const void *_p1, const void *_p2)
981 const struct ViceInodeInfo *p1 = _p1;
982 const struct ViceInodeInfo *p2 = _p2;
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 VolumeId p1rwid, p2rwid;
987 (p1->u.vnode.vnodeNumber ==
988 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
990 (p2->u.vnode.vnodeNumber ==
991 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
999 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1000 if (p1->u.vnode.volumeId == p1rwid)
1002 if (p2->u.vnode.volumeId == p2rwid)
1004 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1006 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1007 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1008 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1010 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1012 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1014 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1016 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1018 /* The following tests are reversed, so that the most desirable
1019 * of several similar inodes comes first */
1020 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1076 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1077 struct InodeSummary *summary)
1079 VolumeId volume = ip->u.vnode.volumeId;
1080 VolumeId rwvolume = volume;
1085 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1087 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1089 rwvolume = ip->u.special.parentId;
1090 /* This isn't quite right, as there could (in error) be different
1091 * parent inodes in different special vnodes */
1093 if (maxunique < ip->u.vnode.vnodeUniquifier)
1094 maxunique = ip->u.vnode.vnodeUniquifier;
1098 summary->volumeId = volume;
1099 summary->RWvolumeId = rwvolume;
1100 summary->nInodes = n;
1101 summary->nSpecialInodes = nSpecial;
1102 summary->maxUniquifier = maxunique;
1106 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1108 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1109 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1110 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1115 * Collect list of inodes in file named by path. If a truly fatal error,
1116 * unlink the file and abort. For lessor errors, return -1. The file will
1117 * be unlinked by the caller.
1120 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1122 struct afs_stat status;
1125 struct ViceInodeInfo *ip, *ip_save;
1126 struct InodeSummary summary;
1127 char summaryFileName[50];
1130 char *dev = salvinfo->fileSysPath;
1131 char *wpath = salvinfo->fileSysPath;
1133 char *dev = salvinfo->fileSysDeviceName;
1134 char *wpath = salvinfo->filesysfulldev;
1136 char *part = salvinfo->fileSysPath;
1140 /* This file used to come from vfsck; cobble it up ourselves now... */
1142 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1143 singleVolumeNumber ? OnlyOneVolume : 0,
1144 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1146 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1149 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1151 if (forceSal && !ForceSalvage) {
1152 Log("***Forced salvage of all volumes on this partition***\n");
1155 fseek(inodeFile, 0L, SEEK_SET);
1156 salvinfo->inodeFd = fileno(inodeFile);
1157 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1158 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1160 tdir = (tmpdir ? tmpdir : part);
1162 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1163 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1165 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1166 "%s/salvage.temp.%d", tdir, getpid());
1168 summaryFile = afs_fopen(summaryFileName, "a+");
1169 if (summaryFile == NULL) {
1170 Abort("Unable to create inode summary file\n");
1174 /* Using nt_unlink here since we're really using the delete on close
1175 * semantics of unlink. In most places in the salvager, we really do
1176 * mean to unlink the file at that point. Those places have been
1177 * modified to actually do that so that the NT crt can be used there.
1179 code = nt_unlink(summaryFileName);
1181 code = unlink(summaryFileName);
1184 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1187 if (!canfork || debug || Fork() == 0) {
1189 unsigned long st_size=(unsigned long) status.st_size;
1190 nInodes = st_size / sizeof(struct ViceInodeInfo);
1192 fclose(summaryFile);
1193 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1194 RemoveTheForce(salvinfo->fileSysPath);
1196 struct VolumeSummary *vsp;
1199 GetVolumeSummary(salvinfo, singleVolumeNumber);
1201 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1203 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1206 Log("%s vice inodes on %s; not salvaged\n",
1207 singleVolumeNumber ? "No applicable" : "No", dev);
1210 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1212 fclose(summaryFile);
1214 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1217 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1218 fclose(summaryFile);
1219 Abort("Unable to read inode table; %s not salvaged\n", dev);
1221 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1222 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1223 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1230 CountVolumeInodes(ip, nInodes, &summary);
1231 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1232 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1233 fclose(summaryFile);
1236 summary.index += (summary.nInodes);
1237 nInodes -= summary.nInodes;
1238 ip += summary.nInodes;
1241 ip = ip_save = NULL;
1242 /* Following fflush is not fclose, because if it was debug mode would not work */
1243 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1244 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1245 fclose(summaryFile);
1248 if (canfork && !debug) {
1253 if (Wait("Inode summary") == -1) {
1254 fclose(summaryFile);
1255 Exit(1); /* salvage of this partition aborted */
1258 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1259 if (status.st_size != 0) {
1261 unsigned long st_status=(unsigned long)status.st_size;
1262 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1263 assert(salvinfo->inodeSummary != NULL);
1264 /* For GNU we need to do lseek to get the file pointer moved. */
1265 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1266 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1267 assert(ret == st_status);
1269 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1270 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1271 salvinfo->inodeSummary[i].volSummary = NULL;
1273 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1274 fclose(summaryFile);
1278 /* Comparison routine for volume sort.
1279 This is setup so that a read-write volume comes immediately before
1280 any read-only clones of that volume */
1282 CompareVolumes(const void *_p1, const void *_p2)
1284 const struct VolumeSummary *p1 = _p1;
1285 const struct VolumeSummary *p2 = _p2;
1286 if (p1->header.parent != p2->header.parent)
1287 return p1->header.parent < p2->header.parent ? -1 : 1;
1288 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1290 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1292 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1296 * Gleans volumeSummary information by asking the fileserver
1298 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1299 * salvaging a whole partition
1301 * @return whether we obtained the volume summary information or not
1302 * @retval 0 success; we obtained the volume summary information
1303 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1305 * @retval 1 we did not get the volume summary information; either the
1306 * fileserver responded with an error, or we are not supposed to
1307 * ask the fileserver for the information (e.g. we are salvaging
1308 * the entire partition or we are not the salvageserver)
1310 * @note for non-DAFS, always returns 1
1313 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1316 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1317 if (programType == salvageServer) {
1318 if (singleVolumeNumber) {
1319 FSSYNC_VGQry_response_t q_res;
1321 struct VolumeSummary *vsp;
1323 struct VolumeDiskHeader diskHdr;
1325 memset(&res, 0, sizeof(res));
1327 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1330 * We must wait for the partition to finish scanning before
1331 * can continue, since we will not know if we got the entire
1332 * VG membership unless the partition is fully scanned.
1333 * We could, in theory, just scan the partition ourselves if
1334 * the VG cache is not ready, but we would be doing the exact
1335 * same scan the fileserver is doing; it will almost always
1336 * be faster to wait for the fileserver. The only exceptions
1337 * are if the partition does not take very long to scan, and
1338 * in that case it's fast either way, so who cares?
1340 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1341 Log("waiting for fileserver to finish scanning partition %s...\n",
1342 salvinfo->fileSysPartition->name);
1344 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1345 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1346 * just so small partitions don't need to wait over 10
1347 * seconds every time, and large partitions are generally
1348 * polled only once every ten seconds. */
1349 sleep((i > 10) ? (i = 10) : i);
1351 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1355 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1356 /* This can happen if there's no header for the volume
1357 * we're salvaging, or no headers exist for the VG (if
1358 * we're salvaging an RW). Act as if we got a response
1359 * with no VG members. The headers may be created during
1360 * salvaging, if there are inodes in this VG. */
1362 memset(&q_res, 0, sizeof(q_res));
1363 q_res.rw = singleVolumeNumber;
1367 Log("fileserver refused VGCQuery request for volume %lu on "
1368 "partition %s, code %ld reason %ld\n",
1369 afs_printable_uint32_lu(singleVolumeNumber),
1370 salvinfo->fileSysPartition->name,
1371 afs_printable_int32_ld(code),
1372 afs_printable_int32_ld(res.hdr.reason));
1376 if (q_res.rw != singleVolumeNumber) {
1377 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1378 afs_printable_uint32_lu(singleVolumeNumber),
1379 afs_printable_uint32_lu(q_res.rw));
1380 #ifdef SALVSYNC_BUILD_CLIENT
1381 if (SALVSYNC_LinkVolume(q_res.rw,
1383 salvinfo->fileSysPartition->name,
1385 Log("schedule request failed\n");
1387 #endif /* SALVSYNC_BUILD_CLIENT */
1388 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1391 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1392 assert(salvinfo->volumeSummaryp != NULL);
1394 salvinfo->nVolumes = 0;
1395 vsp = salvinfo->volumeSummaryp;
1397 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1398 char name[VMAXPATHLEN];
1400 if (!q_res.children[i]) {
1404 /* AskOffline for singleVolumeNumber was called much earlier */
1405 if (q_res.children[i] != singleVolumeNumber) {
1406 AskOffline(salvinfo, q_res.children[i]);
1407 if (LockVolume(salvinfo, q_res.children[i])) {
1413 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1415 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1416 afs_printable_uint32_lu(q_res.children[i]));
1421 DiskToVolumeHeader(&vsp->header, &diskHdr);
1422 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1423 vsp->fileName = ToString(name);
1424 salvinfo->nVolumes++;
1428 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1433 Log("Cannot get volume summary from fileserver; falling back to scanning "
1434 "entire partition\n");
1437 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1442 * count how many volume headers are found by VWalkVolumeHeaders.
1444 * @param[in] dp the disk partition (unused)
1445 * @param[in] name full path to the .vol header (unused)
1446 * @param[in] hdr the header data (unused)
1447 * @param[in] last whether this is the last try or not (unused)
1448 * @param[in] rock actually an afs_int32*; the running count of how many
1449 * volumes we have found
1454 CountHeader(struct DiskPartition64 *dp, const char *name,
1455 struct VolumeDiskHeader *hdr, int last, void *rock)
1457 afs_int32 *nvols = (afs_int32 *)rock;
1463 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1466 struct SalvageScanParams {
1467 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1468 * vol id of the VG we're salvaging */
1469 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1470 * we're filling in */
1471 afs_int32 nVolumes; /**< # of vols we've encountered */
1472 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1473 * # of vols we've alloc'd memory for) */
1474 int retry; /**< do we need to retry vol lock/checkout? */
1475 struct SalvInfo *salvinfo; /**< salvage job info */
1479 * records volume summary info found from VWalkVolumeHeaders.
1481 * Found volumes are also taken offline if they are in the specific volume
1482 * group we are looking for.
1484 * @param[in] dp the disk partition
1485 * @param[in] name full path to the .vol header
1486 * @param[in] hdr the header data
1487 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1488 * @param[in] rock actually a struct SalvageScanParams*, containing the
1489 * information needed to record the volume summary data
1491 * @return operation status
1493 * @retval -1 volume locking raced with fileserver restart; checking out
1494 * and locking volumes needs to be retried
1495 * @retval 1 volume header is mis-named and should be deleted
1498 RecordHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 char nameShouldBe[64];
1502 struct SalvageScanParams *params;
1503 struct VolumeSummary summary;
1504 VolumeId singleVolumeNumber;
1505 struct SalvInfo *salvinfo;
1507 params = (struct SalvageScanParams *)rock;
1509 singleVolumeNumber = params->singleVolumeNumber;
1510 salvinfo = params->salvinfo;
1512 DiskToVolumeHeader(&summary.header, hdr);
1514 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1515 && summary.header.parent != singleVolumeNumber) {
1517 if (programType == salvageServer) {
1518 #ifdef SALVSYNC_BUILD_CLIENT
1519 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1520 summary.header.id, summary.header.parent);
1521 if (SALVSYNC_LinkVolume(summary.header.parent,
1525 Log("schedule request failed\n");
1528 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1531 Log("%u is a read-only volume; not salvaged\n",
1532 singleVolumeNumber);
1537 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1538 || summary.header.parent == singleVolumeNumber) {
1540 /* check if the header file is incorrectly named */
1542 const char *base = strrchr(name, '/');
1549 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1550 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1553 if (strcmp(nameShouldBe, base)) {
1554 /* .vol file has wrong name; retry/delete */
1558 if (!badname || last) {
1559 /* only offline the volume if the header is good, or if this is
1560 * the last try looking at it; avoid AskOffline'ing the same vol
1563 if (singleVolumeNumber
1564 && summary.header.id != singleVolumeNumber) {
1565 /* don't offline singleVolumeNumber; we already did that
1568 AskOffline(salvinfo, summary.header.id);
1570 #ifdef AFS_DEMAND_ATTACH_FS
1572 /* don't lock the volume if the header is bad, since we're
1573 * about to delete it anyway. */
1574 if (LockVolume(salvinfo, summary.header.id)) {
1579 #endif /* AFS_DEMAND_ATTACH_FS */
1583 if (last && !Showmode) {
1584 Log("Volume header file %s is incorrectly named (should be %s "
1585 "not %s); %sdeleted (it will be recreated later, if "
1586 "necessary)\n", name, nameShouldBe, base,
1587 (Testing ? "it would have been " : ""));
1592 summary.fileName = ToString(base);
1595 if (params->nVolumes > params->totalVolumes) {
1596 /* We found more volumes than we found on the first partition walk;
1597 * apparently something created a volume while we were
1598 * partition-salvaging, or we found more than 20 vols when salvaging a
1599 * particular volume. Abort if we detect this, since other programs
1600 * supposed to not touch the partition while it is partition-salvaging,
1601 * and we shouldn't find more than 20 vols in a VG.
1603 Abort("Found %ld vol headers, but should have found at most %ld! "
1604 "Make sure the volserver/fileserver are not running at the "
1605 "same time as a partition salvage\n",
1606 afs_printable_int32_ld(params->nVolumes),
1607 afs_printable_int32_ld(params->totalVolumes));
1610 memcpy(params->vsp, &summary, sizeof(summary));
1618 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1620 * If the header could not be read in at all, the header is always unlinked.
1621 * If instead RecordHeader said the header was bad (that is, the header file
1622 * is mis-named), we only unlink if we are doing a partition salvage, as
1623 * opposed to salvaging a specific volume group.
1625 * @param[in] dp the disk partition
1626 * @param[in] name full path to the .vol header
1627 * @param[in] hdr header data, or NULL if the header could not be read
1628 * @param[in] rock actually a struct SalvageScanParams*, with some information
1632 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1633 struct VolumeDiskHeader *hdr, void *rock)
1635 struct SalvageScanParams *params;
1638 params = (struct SalvageScanParams *)rock;
1641 /* no header; header is too bogus to read in at all */
1643 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1649 } else if (!params->singleVolumeNumber) {
1650 /* We were able to read in a header, but RecordHeader said something
1651 * was wrong with it. We only unlink those if we are doing a partition
1658 if (dounlink && unlink(name)) {
1659 Log("Error %d while trying to unlink %s\n", errno, name);
1664 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1665 * the fileserver for VG information, or by scanning the /vicepX partition.
1667 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1668 * are salvaging, or 0 if this is a partition
1671 * @return operation status
1673 * @retval -1 we raced with a fileserver restart; checking out and locking
1674 * volumes must be retried
1677 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1679 afs_int32 nvols = 0;
1680 struct SalvageScanParams params;
1683 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1685 /* we successfully got the vol information from the fileserver; no
1686 * need to scan the partition */
1690 /* we need to retry volume checkout */
1694 if (!singleVolumeNumber) {
1695 /* Count how many volumes we have in /vicepX */
1696 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1699 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1704 nvols = VOL_VG_MAX_VOLS;
1707 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1708 assert(salvinfo->volumeSummaryp != NULL);
1710 params.singleVolumeNumber = singleVolumeNumber;
1711 params.vsp = salvinfo->volumeSummaryp;
1712 params.nVolumes = 0;
1713 params.totalVolumes = nvols;
1715 params.salvinfo = salvinfo;
1717 /* walk the partition directory of volume headers and record the info
1718 * about them; unlinking invalid headers */
1719 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1720 UnlinkHeader, ¶ms);
1722 /* we apparently need to retry checking-out/locking volumes */
1726 Abort("Failed to get volume header summary\n");
1728 salvinfo->nVolumes = params.nVolumes;
1730 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1736 /* Find the link table. This should be associated with the RW volume or, if
1737 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1740 FindLinkHandle(struct InodeSummary *isp, int nVols,
1741 struct ViceInodeInfo *allInodes)
1744 struct ViceInodeInfo *ip;
1746 for (i = 0; i < nVols; i++) {
1747 ip = allInodes + isp[i].index;
1748 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1749 if (ip[j].u.special.type == VI_LINKTABLE)
1750 return ip[j].inodeNumber;
1757 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1759 struct versionStamp version;
1762 if (!VALID_INO(ino))
1764 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1765 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1766 if (!VALID_INO(ino))
1768 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1769 isp->RWvolumeId, errno);
1770 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1771 fdP = IH_OPEN(salvinfo->VGLinkH);
1773 Abort("Can't open link table for volume %u (error = %d)\n",
1774 isp->RWvolumeId, errno);
1776 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1777 Abort("Can't truncate link table for volume %u (error = %d)\n",
1778 isp->RWvolumeId, errno);
1780 version.magic = LINKTABLEMAGIC;
1781 version.version = LINKTABLEVERSION;
1783 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1785 Abort("Can't truncate link table for volume %u (error = %d)\n",
1786 isp->RWvolumeId, errno);
1788 FDH_REALLYCLOSE(fdP);
1790 /* If the volume summary exits (i.e., the V*.vol header file exists),
1791 * then set this inode there as well.
1793 if (isp->volSummary)
1794 isp->volSummary->header.linkTable = ino;
1803 SVGParms_t *parms = (SVGParms_t *) arg;
1804 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1809 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1812 pthread_attr_t tattr;
1816 /* Initialize per volume global variables, even if later code does so */
1817 salvinfo->VolumeChanged = 0;
1818 salvinfo->VGLinkH = NULL;
1819 salvinfo->VGLinkH_cnt = 0;
1820 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1822 parms.svgp_inodeSummaryp = isp;
1823 parms.svgp_count = nVols;
1824 parms.svgp_salvinfo = salvinfo;
1825 code = pthread_attr_init(&tattr);
1827 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1831 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1833 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1836 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1838 Log("Failed to create thread to salvage volume group %u\n",
1842 (void)pthread_join(tid, NULL);
1844 #endif /* AFS_NT40_ENV */
1847 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1849 struct ViceInodeInfo *inodes, *allInodes, *ip;
1850 int i, totalInodes, size, salvageTo;
1854 int dec_VGLinkH = 0;
1856 FdHandle_t *fdP = NULL;
1858 salvinfo->VGLinkH_cnt = 0;
1859 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1860 && isp->nSpecialInodes > 0);
1861 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1862 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1865 if (ShowMounts && !haveRWvolume)
1867 if (canfork && !debug && Fork() != 0) {
1868 (void)Wait("Salvage volume group");
1871 for (i = 0, totalInodes = 0; i < nVols; i++)
1872 totalInodes += isp[i].nInodes;
1873 size = totalInodes * sizeof(struct ViceInodeInfo);
1874 inodes = (struct ViceInodeInfo *)malloc(size);
1875 allInodes = inodes - isp->index; /* this would the base of all the inodes
1876 * for the partition, if all the inodes
1877 * had been read into memory */
1879 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1881 assert(read(salvinfo->inodeFd, inodes, size) == size);
1883 /* Don't try to salvage a read write volume if there isn't one on this
1885 salvageTo = haveRWvolume ? 0 : 1;
1887 #ifdef AFS_NAMEI_ENV
1888 ino = FindLinkHandle(isp, nVols, allInodes);
1889 if (VALID_INO(ino)) {
1890 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1891 fdP = IH_OPEN(salvinfo->VGLinkH);
1893 if (!VALID_INO(ino) || fdP == NULL) {
1894 Log("%s link table for volume %u.\n",
1895 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1897 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1900 struct ViceInodeInfo *ip;
1901 CreateLinkTable(salvinfo, isp, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1903 /* Sync fake 1 link counts to the link table, now that it exists */
1905 for (i = 0; i < nVols; i++) {
1906 ip = allInodes + isp[i].index;
1907 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1909 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1911 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1919 FDH_REALLYCLOSE(fdP);
1921 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1924 /* Salvage in reverse order--read/write volume last; this way any
1925 * Inodes not referenced by the time we salvage the read/write volume
1926 * can be picked up by the read/write volume */
1927 /* ACTUALLY, that's not done right now--the inodes just vanish */
1928 for (i = nVols - 1; i >= salvageTo; i--) {
1930 struct InodeSummary *lisp = &isp[i];
1931 #ifdef AFS_NAMEI_ENV
1932 /* If only the RO is present on this partition, the link table
1933 * shows up as a RW volume special file. Need to make sure the
1934 * salvager doesn't try to salvage the non-existent RW.
1936 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1937 /* If this only special inode is the link table, continue */
1938 if (inodes->u.special.type == VI_LINKTABLE) {
1945 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1946 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1947 /* Check inodes twice. The second time do things seriously. This
1948 * way the whole RO volume can be deleted, below, if anything goes wrong */
1949 for (check = 1; check >= 0; check--) {
1951 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1953 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1954 if (rw && deleteMe) {
1955 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1956 * volume won't be called */
1962 if (rw && check == 1)
1964 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1965 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1971 /* Fix actual inode counts */
1974 Log("totalInodes %d\n",totalInodes);
1975 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1976 static int TraceBadLinkCounts = 0;
1977 #ifdef AFS_NAMEI_ENV
1978 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1979 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1980 VGLinkH_p1 = ip->u.param[0];
1981 continue; /* Deal with this last. */
1984 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1985 TraceBadLinkCounts--; /* Limit reports, per volume */
1986 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1988 while (ip->linkCount > 0) {
1989 /* below used to assert, not break */
1991 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1992 Log("idec failed. inode %s errno %d\n",
1993 PrintInode(stmp, ip->inodeNumber), errno);
1999 while (ip->linkCount < 0) {
2000 /* these used to be asserts */
2002 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2003 Log("iinc failed. inode %s errno %d\n",
2004 PrintInode(stmp, ip->inodeNumber), errno);
2011 #ifdef AFS_NAMEI_ENV
2012 while (dec_VGLinkH > 0) {
2013 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2014 Log("idec failed on link table, errno = %d\n", errno);
2018 while (dec_VGLinkH < 0) {
2019 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2020 Log("iinc failed on link table, errno = %d\n", errno);
2027 /* Directory consistency checks on the rw volume */
2029 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2030 IH_RELEASE(salvinfo->VGLinkH);
2032 if (canfork && !debug) {
2039 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2041 /* Check headers BEFORE forking */
2045 for (i = 0; i < nVols; i++) {
2046 struct VolumeSummary *vs = isp[i].volSummary;
2047 VolumeDiskData volHeader;
2049 /* Don't salvage just because phantom rw volume is there... */
2050 /* (If a read-only volume exists, read/write inodes must also exist) */
2051 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2055 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2056 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2057 == sizeof(volHeader)
2058 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2059 && volHeader.dontSalvage == DONT_SALVAGE
2060 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2061 if (volHeader.inUse != 0) {
2062 volHeader.inUse = 0;
2063 volHeader.inService = 1;
2065 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2066 != sizeof(volHeader)) {
2082 /* SalvageVolumeHeaderFile
2084 * Salvage the top level V*.vol header file. Make sure the special files
2085 * exist and that there are no duplicates.
2087 * Calls SalvageHeader for each possible type of volume special file.
2091 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2092 struct ViceInodeInfo *inodes, int RW,
2093 int check, int *deleteMe)
2096 struct ViceInodeInfo *ip;
2097 int allinodesobsolete = 1;
2098 struct VolumeDiskHeader diskHeader;
2099 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2101 struct VolumeHeader tempHeader;
2102 struct afs_inode_info stuff[MAXINODETYPE];
2104 /* keeps track of special inodes that are probably 'good'; they are
2105 * referenced in the vol header, and are included in the given inodes
2110 } goodspecial[MAXINODETYPE];
2115 memset(goodspecial, 0, sizeof(goodspecial));
2117 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2119 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2121 Log("cannot allocate memory for inode skip array when salvaging "
2122 "volume %lu; not performing duplicate special inode recovery\n",
2123 afs_printable_uint32_lu(isp->volumeId));
2124 /* still try to perform the salvage; the skip array only does anything
2125 * if we detect duplicate special inodes */
2128 init_inode_info(&tempHeader, stuff);
2131 * First, look at the special inodes and see if any are referenced by
2132 * the existing volume header. If we find duplicate special inodes, we
2133 * can use this information to use the referenced inode (it's more
2134 * likely to be the 'good' one), and throw away the duplicates.
2136 if (isp->volSummary && skip) {
2137 /* use tempHeader, so we can use the stuff[] array to easily index
2138 * into the isp->volSummary special inodes */
2139 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2141 for (i = 0; i < isp->nSpecialInodes; i++) {
2142 ip = &inodes[isp->index + i];
2143 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2144 /* will get taken care of in a later loop */
2147 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2148 goodspecial[ip->u.special.type-1].valid = 1;
2149 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2154 memset(&tempHeader, 0, sizeof(tempHeader));
2155 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2156 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2157 tempHeader.id = isp->volumeId;
2158 tempHeader.parent = isp->RWvolumeId;
2160 /* Check for duplicates (inodes are sorted by type field) */
2161 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2162 ip = &inodes[isp->index + i];
2163 if (ip->u.special.type == (ip + 1)->u.special.type) {
2164 afs_ino_str_t stmp1, stmp2;
2166 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2167 /* Will be caught in the loop below */
2171 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2172 ip->u.special.type, isp->volumeId,
2173 PrintInode(stmp1, ip->inodeNumber),
2174 PrintInode(stmp2, (ip+1)->inodeNumber));
2176 if (skip && goodspecial[ip->u.special.type-1].valid) {
2177 Inode gi = goodspecial[ip->u.special.type-1].inode;
2180 Log("using special inode referenced by vol header (%s)\n",
2181 PrintInode(stmp1, gi));
2184 /* the volume header references some special inode of
2185 * this type in the inodes array; are we it? */
2186 if (ip->inodeNumber != gi) {
2188 } else if ((ip+1)->inodeNumber != gi) {
2189 /* in case this is the last iteration; we need to
2190 * make sure we check ip+1, too */
2195 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2203 for (i = 0; i < isp->nSpecialInodes; i++) {
2205 ip = &inodes[isp->index + i];
2206 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2208 Log("Rubbish header inode %s of type %d\n",
2209 PrintInode(stmp, ip->inodeNumber),
2210 ip->u.special.type);
2216 Log("Rubbish header inode %s of type %d; deleted\n",
2217 PrintInode(stmp, ip->inodeNumber),
2218 ip->u.special.type);
2219 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2220 if (skip && skip[i]) {
2221 if (orphans == ORPH_REMOVE) {
2222 Log("Removing orphan special inode %s of type %d\n",
2223 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2226 Log("Ignoring orphan special inode %s of type %d\n",
2227 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2228 /* fall through to the ip->linkCount--; line below */
2231 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2232 allinodesobsolete = 0;
2234 if (!check && ip->u.special.type != VI_LINKTABLE)
2235 ip->linkCount--; /* Keep the inode around */
2243 if (allinodesobsolete) {
2250 salvinfo->VGLinkH_cnt++; /* one for every header. */
2252 if (!RW && !check && isp->volSummary) {
2253 ClearROInUseBit(isp->volSummary);
2257 for (i = 0; i < MAXINODETYPE; i++) {
2258 if (stuff[i].inodeType == VI_LINKTABLE) {
2259 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2260 * And we may have recreated the link table earlier, so set the
2261 * RW header as well.
2263 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2264 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2268 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2272 if (isp->volSummary == NULL) {
2274 char headerName[64];
2275 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2276 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2278 Log("No header file for volume %u\n", isp->volumeId);
2282 Log("No header file for volume %u; %screating %s\n",
2283 isp->volumeId, (Testing ? "it would have been " : ""),
2285 isp->volSummary = (struct VolumeSummary *)
2286 malloc(sizeof(struct VolumeSummary));
2287 isp->volSummary->fileName = ToString(headerName);
2289 writefunc = VCreateVolumeDiskHeader;
2292 char headerName[64];
2293 /* hack: these two fields are obsolete... */
2294 isp->volSummary->header.volumeAcl = 0;
2295 isp->volSummary->header.volumeMountTable = 0;
2298 (&isp->volSummary->header, &tempHeader,
2299 sizeof(struct VolumeHeader))) {
2300 /* We often remove the name before calling us, so we make a fake one up */
2301 if (isp->volSummary->fileName) {
2302 strcpy(headerName, isp->volSummary->fileName);
2304 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2305 isp->volSummary->fileName = ToString(headerName);
2307 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2309 Log("Header file %s is damaged or no longer valid%s\n", path,
2310 (check ? "" : "; repairing"));
2314 writefunc = VWriteVolumeDiskHeader;
2318 memcpy(&isp->volSummary->header, &tempHeader,
2319 sizeof(struct VolumeHeader));
2322 Log("It would have written a new header file for volume %u\n",
2326 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2327 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2329 Log("Error %ld writing volume header file for volume %lu\n",
2330 afs_printable_int32_ld(code),
2331 afs_printable_uint32_lu(diskHeader.id));
2336 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2337 isp->volSummary->header.volumeInfo);
2342 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2343 struct InodeSummary *isp, int check, int *deleteMe)
2346 VolumeDiskData volumeInfo;
2347 struct versionStamp fileHeader;
2356 #ifndef AFS_NAMEI_ENV
2357 if (sp->inodeType == VI_LINKTABLE)
2360 if (*(sp->inode) == 0) {
2362 Log("Missing inode in volume header (%s)\n", sp->description);
2366 Log("Missing inode in volume header (%s); %s\n", sp->description,
2367 (Testing ? "it would have recreated it" : "recreating"));
2370 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2371 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2372 if (!VALID_INO(*(sp->inode)))
2374 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2375 sp->description, errno);
2380 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2381 fdP = IH_OPEN(specH);
2382 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2383 /* bail out early and destroy the volume */
2385 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2392 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2393 sp->description, errno);
2396 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2397 || header.fileHeader.magic != sp->stamp.magic)) {
2399 Log("Part of the header (%s) is corrupted\n", sp->description);
2400 FDH_REALLYCLOSE(fdP);
2404 Log("Part of the header (%s) is corrupted; recreating\n",
2407 /* header can be garbage; make sure we don't read garbage data from
2409 memset(&header, 0, sizeof(header));
2411 if (sp->inodeType == VI_VOLINFO
2412 && header.volumeInfo.destroyMe == DESTROY_ME) {
2415 FDH_REALLYCLOSE(fdP);
2419 if (recreate && !Testing) {
2422 ("Internal error: recreating volume header (%s) in check mode\n",
2424 nBytes = FDH_TRUNC(fdP, 0);
2426 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2427 sp->description, errno);
2429 /* The following code should be moved into vutil.c */
2430 if (sp->inodeType == VI_VOLINFO) {
2432 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2433 header.volumeInfo.stamp = sp->stamp;
2434 header.volumeInfo.id = isp->volumeId;
2435 header.volumeInfo.parentId = isp->RWvolumeId;
2436 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2437 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2438 isp->volumeId, isp->volumeId);
2439 header.volumeInfo.inService = 0;
2440 header.volumeInfo.blessed = 0;
2441 /* The + 1000 is a hack in case there are any files out in venus caches */
2442 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2443 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2444 header.volumeInfo.needsCallback = 0;
2445 gettimeofday(&tp, 0);
2446 header.volumeInfo.creationDate = tp.tv_sec;
2447 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2449 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2450 sp->description, errno);
2453 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2454 sizeof(header.volumeInfo));
2455 if (nBytes != sizeof(header.volumeInfo)) {
2458 ("Unable to write volume header file (%s) (errno = %d)\n",
2459 sp->description, errno);
2460 Abort("Unable to write entire volume header file (%s)\n",
2464 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2466 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2467 sp->description, errno);
2469 nBytes = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2470 if (nBytes != sizeof(sp->stamp)) {
2473 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2474 sp->description, errno);
2476 ("Unable to write entire version stamp in volume header file (%s)\n",
2481 FDH_REALLYCLOSE(fdP);
2483 if (sp->inodeType == VI_VOLINFO) {
2484 salvinfo->VolInfo = header.volumeInfo;
2488 if (salvinfo->VolInfo.updateDate) {
2489 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2491 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2492 salvinfo->VolInfo.id,
2493 (Testing ? "it would have been " : ""), update);
2495 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2497 Log("%s (%u) not updated (created %s)\n",
2498 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2508 SalvageVnodes(struct SalvInfo *salvinfo,
2509 struct InodeSummary *rwIsp,
2510 struct InodeSummary *thisIsp,
2511 struct ViceInodeInfo *inodes, int check)
2513 int ilarge, ismall, ioffset, RW, nInodes;
2514 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2517 RW = (rwIsp == thisIsp);
2518 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2520 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2521 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2522 if (check && ismall == -1)
2525 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2526 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2527 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2531 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2532 struct ViceInodeInfo *ip, int nInodes,
2533 struct VolumeSummary *volSummary, int check)
2535 VolumeId volumeNumber;
2536 char buf[SIZEOF_LARGEDISKVNODE];
2537 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2539 StreamHandle_t *file;
2540 struct VnodeClassInfo *vcp;
2542 afs_sfsize_t nVnodes;
2543 afs_fsize_t vnodeLength;
2545 afs_ino_str_t stmp1, stmp2;
2549 volumeNumber = volSummary->header.id;
2550 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2551 fdP = IH_OPEN(handle);
2552 assert(fdP != NULL);
2553 file = FDH_FDOPEN(fdP, "r+");
2554 assert(file != NULL);
2555 vcp = &VnodeClassInfo[class];
2556 size = OS_SIZE(fdP->fd_fd);
2558 nVnodes = (size / vcp->diskSize) - 1;
2560 assert((nVnodes + 1) * vcp->diskSize == size);
2561 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2565 for (vnodeIndex = 0;
2566 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2567 nVnodes--, vnodeIndex++) {
2568 if (vnode->type != vNull) {
2569 int vnodeChanged = 0;
2570 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2571 /* Log programs that belong to root (potentially suid root);
2572 * don't bother for read-only or backup volumes */
2573 #ifdef notdef /* This is done elsewhere */
2574 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2575 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", salvinfo->VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2577 if (VNDISK_GET_INO(vnode) == 0) {
2579 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2580 memset(vnode, 0, vcp->diskSize);
2584 if (vcp->magic != vnode->vnodeMagic) {
2585 /* bad magic #, probably partially created vnode */
2586 Log("Partially allocated vnode %d deleted.\n",
2588 memset(vnode, 0, vcp->diskSize);
2592 /* ****** Should do a bit more salvage here: e.g. make sure
2593 * vnode type matches what it should be given the index */
2594 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2595 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2596 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2597 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2604 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2605 /* The following doesn't work, because the version number
2606 * is not maintained correctly by the file server */
2607 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2608 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2610 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2616 /* For RW volume, look for vnode with matching inode number;
2617 * if no such match, take the first determined by our sort
2619 struct ViceInodeInfo *lip = ip;
2620 int lnInodes = nInodes;
2622 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2623 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2632 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2633 /* "Matching" inode */
2637 vu = vnode->uniquifier;
2638 iu = ip->u.vnode.vnodeUniquifier;
2639 vd = vnode->dataVersion;
2640 id = ip->u.vnode.inodeDataVersion;
2642 * Because of the possibility of the uniquifier overflows (> 4M)
2643 * we compare them modulo the low 22-bits; we shouldn't worry
2644 * about mismatching since they shouldn't to many old
2645 * uniquifiers of the same vnode...
2647 if (IUnique(vu) != IUnique(iu)) {
2649 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2652 vnode->uniquifier = iu;
2653 #ifdef AFS_3DISPARES
2654 vnode->dataVersion = (id >= vd ?
2657 1887437 ? vd : id) :
2660 1887437 ? id : vd));
2662 #if defined(AFS_SGI_EXMAG)
2663 vnode->dataVersion = (id >= vd ?
2666 15099494 ? vd : id) :
2669 15099494 ? id : vd));
2671 vnode->dataVersion = (id > vd ? id : vd);
2672 #endif /* AFS_SGI_EXMAG */
2673 #endif /* AFS_3DISPARES */
2676 /* don't bother checking for vd > id any more, since
2677 * partial file transfers always result in this state,
2678 * and you can't do much else anyway (you've already
2679 * found the best data you can) */
2680 #ifdef AFS_3DISPARES
2681 if (!vnodeIsDirectory(vnodeNumber)
2682 && ((vd < id && (id - vd) < 1887437)
2683 || ((vd > id && (vd - id) > 1887437)))) {
2685 #if defined(AFS_SGI_EXMAG)
2686 if (!vnodeIsDirectory(vnodeNumber)
2687 && ((vd < id && (id - vd) < 15099494)
2688 || ((vd > id && (vd - id) > 15099494)))) {
2690 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2691 #endif /* AFS_SGI_EXMAG */
2694 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2695 vnode->dataVersion = id;
2700 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2703 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2705 VNDISK_SET_INO(vnode, ip->inodeNumber);
2710 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2712 VNDISK_SET_INO(vnode, ip->inodeNumber);
2715 VNDISK_GET_LEN(vnodeLength, vnode);
2716 if (ip->byteCount != vnodeLength) {
2719 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2724 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2725 VNDISK_SET_LEN(vnode, ip->byteCount);
2729 ip->linkCount--; /* Keep the inode around */
2732 } else { /* no matching inode */
2734 if (VNDISK_GET_INO(vnode) != 0
2735 || vnode->type == vDirectory) {
2736 /* No matching inode--get rid of the vnode */
2738 if (VNDISK_GET_INO(vnode)) {
2740 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2744 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2749 if (VNDISK_GET_INO(vnode)) {
2751 time_t serverModifyTime = vnode->serverModifyTime;
2752 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2756 time_t serverModifyTime = vnode->serverModifyTime;
2757 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2760 memset(vnode, 0, vcp->diskSize);
2763 /* Should not reach here becuase we checked for
2764 * (inodeNumber == 0) above. And where we zero the vnode,
2765 * we also goto vnodeDone.
2769 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2773 } /* VNDISK_GET_INO(vnode) != 0 */
2775 assert(!(vnodeChanged && check));
2776 if (vnodeChanged && !Testing) {
2778 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2779 (char *)vnode, vcp->diskSize)
2781 salvinfo->VolumeChanged = 1; /* For break call back */
2792 struct VnodeEssence *
2793 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2796 struct VnodeInfo *vip;
2799 class = vnodeIdToClass(vnodeNumber);
2800 vip = &salvinfo->vnodeInfo[class];
2801 offset = vnodeIdToBitNumber(vnodeNumber);
2802 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2806 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2808 /* Copy the directory unconditionally if we are going to change it:
2809 * not just if was cloned.
2811 struct VnodeDiskObject vnode;
2812 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2813 Inode oldinode, newinode;
2816 if (dir->copied || Testing)
2818 DFlush(); /* Well justified paranoia... */
2821 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2822 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2824 assert(code == sizeof(vnode));
2825 oldinode = VNDISK_GET_INO(&vnode);
2826 /* Increment the version number by a whole lot to avoid problems with
2827 * clients that were promised new version numbers--but the file server
2828 * crashed before the versions were written to disk.
2831 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2832 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2834 assert(VALID_INO(newinode));
2835 assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2837 VNDISK_SET_INO(&vnode, newinode);
2839 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2840 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2842 assert(code == sizeof(vnode));
2844 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2845 salvinfo->fileSysDevice, newinode,
2846 &salvinfo->VolumeChanged);
2847 /* Don't delete the original inode right away, because the directory is
2848 * still being scanned.
2854 * This function should either successfully create a new dir, or give up
2855 * and leave things the way they were. In particular, if it fails to write
2856 * the new dir properly, it should return w/o changing the reference to the
2860 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2862 struct VnodeDiskObject vnode;
2863 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2864 Inode oldinode, newinode;
2869 afs_int32 parentUnique = 1;
2870 struct VnodeEssence *vnodeEssence;
2875 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2877 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2878 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2880 assert(lcode == sizeof(vnode));
2881 oldinode = VNDISK_GET_INO(&vnode);
2882 /* Increment the version number by a whole lot to avoid problems with
2883 * clients that were promised new version numbers--but the file server
2884 * crashed before the versions were written to disk.
2887 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2888 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2890 assert(VALID_INO(newinode));
2891 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2892 &salvinfo->VolumeChanged);
2894 /* Assign . and .. vnode numbers from dir and vnode.parent.
2895 * The uniquifier for . is in the vnode.
2896 * The uniquifier for .. might be set to a bogus value of 1 and
2897 * the salvager will later clean it up.
2899 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2900 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2903 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2905 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2910 /* didn't really build the new directory properly, let's just give up. */
2911 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2912 Log("Directory salvage returned code %d, continuing.\n", code);
2914 Log("also failed to decrement link count on new inode");
2918 Log("Checking the results of the directory salvage...\n");
2919 if (!DirOK(&newdir)) {
2920 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2921 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2926 VNDISK_SET_INO(&vnode, newinode);
2927 length = Length(&newdir);
2928 VNDISK_SET_LEN(&vnode, length);
2930 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2931 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2933 assert(lcode == sizeof(vnode));
2936 nt_sync(salvinfo->fileSysDevice);
2938 sync(); /* this is slow, but hopefully rarely called. We don't have
2939 * an open FD on the file itself to fsync.
2943 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2945 /* make sure old directory file is really closed */
2946 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2947 FDH_REALLYCLOSE(fdP);
2949 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2951 dir->dirHandle = newdir;
2955 * arguments for JudgeEntry.
2957 struct judgeEntry_params {
2958 struct DirSummary *dir; /**< directory we're examining entries in */
2959 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2963 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2966 struct judgeEntry_params *params = arock;
2967 struct DirSummary *dir = params->dir;
2968 struct SalvInfo *salvinfo = params->salvinfo;
2969 struct VnodeEssence *vnodeEssence;
2970 afs_int32 dirOrphaned, todelete;
2972 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2974 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2975 if (vnodeEssence == NULL) {
2977 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2980 CopyOnWrite(salvinfo, dir);
2981 assert(Delete(&dir->dirHandle, name) == 0);
2986 #ifndef AFS_NAMEI_ENV
2987 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2988 * mount inode for the partition. If this inode were deleted, it would crash
2991 if (vnodeEssence->InodeNumber == 0) {
2992 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2994 CopyOnWrite(salvinfo, dir);
2995 assert(Delete(&dir->dirHandle, name) == 0);
3002 if (!(vnodeNumber & 1) && !Showmode
3003 && !(vnodeEssence->count || vnodeEssence->unique
3004 || vnodeEssence->modeBits)) {
3005 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
3006 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3007 vnodeNumber, unique,
3008 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3012 CopyOnWrite(salvinfo, dir);
3013 assert(Delete(&dir->dirHandle, name) == 0);
3019 /* Check if the Uniquifiers match. If not, change the directory entry
3020 * so its unique matches the vnode unique. Delete if the unique is zero
3021 * or if the directory is orphaned.
3023 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3024 if (!vnodeEssence->unique
3025 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3026 /* This is an orphaned directory. Don't delete the . or ..
3027 * entry. Otherwise, it will get created in the next
3028 * salvage and deleted again here. So Just skip it.
3033 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3036 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3040 fid.Vnode = vnodeNumber;
3041 fid.Unique = vnodeEssence->unique;
3042 CopyOnWrite(salvinfo, dir);
3043 assert(Delete(&dir->dirHandle, name) == 0);
3045 assert(Create(&dir->dirHandle, name, &fid) == 0);
3048 return 0; /* no need to continue */
3051 if (strcmp(name, ".") == 0) {
3052 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3055 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3057 CopyOnWrite(salvinfo, dir);
3058 assert(Delete(&dir->dirHandle, ".") == 0);
3059 fid.Vnode = dir->vnodeNumber;
3060 fid.Unique = dir->unique;
3061 assert(Create(&dir->dirHandle, ".", &fid) == 0);
3064 vnodeNumber = fid.Vnode; /* Get the new Essence */
3065 unique = fid.Unique;
3066 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3069 } else if (strcmp(name, "..") == 0) {
3072 struct VnodeEssence *dotdot;
3073 pa.Vnode = dir->parent;
3074 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3075 assert(dotdot != NULL); /* XXX Should not be assert */
3076 pa.Unique = dotdot->unique;
3078 pa.Vnode = dir->vnodeNumber;
3079 pa.Unique = dir->unique;
3081 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3083 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3085 CopyOnWrite(salvinfo, dir);
3086 assert(Delete(&dir->dirHandle, "..") == 0);
3087 assert(Create(&dir->dirHandle, "..", &pa) == 0);
3090 vnodeNumber = pa.Vnode; /* Get the new Essence */
3092 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3094 dir->haveDotDot = 1;
3095 } else if (strncmp(name, ".__afs", 6) == 0) {
3097 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3100 CopyOnWrite(salvinfo, dir);
3101 assert(Delete(&dir->dirHandle, name) == 0);
3103 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3104 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3107 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3108 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3109 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3110 && !(vnodeEssence->modeBits & 0111)) {
3117 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3118 vnodeEssence->InodeNumber);
3121 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3125 size = FDH_SIZE(fdP);
3127 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3128 FDH_REALLYCLOSE(fdP);
3135 nBytes = FDH_READ(fdP, buf, size);
3136 if (nBytes == size) {
3138 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3139 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3140 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3141 Testing ? "would convert" : "converted");
3142 vnodeEssence->modeBits |= 0111;
3143 vnodeEssence->changed = 1;
3144 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3145 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3146 dir->name ? dir->name : "??", name, buf);
3148 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3149 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3151 FDH_REALLYCLOSE(fdP);
3154 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3155 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3156 if (vnodeIdToClass(vnodeNumber) == vLarge
3157 && vnodeEssence->name == NULL) {
3159 if ((n = (char *)malloc(strlen(name) + 1)))
3161 vnodeEssence->name = n;
3164 /* The directory entry points to the vnode. Check to see if the
3165 * vnode points back to the directory. If not, then let the
3166 * directory claim it (else it might end up orphaned). Vnodes
3167 * already claimed by another directory are deleted from this
3168 * directory: hardlinks to the same vnode are not allowed
3169 * from different directories.
3171 if (vnodeEssence->parent != dir->vnodeNumber) {
3172 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3173 /* Vnode does not point back to this directory.
3174 * Orphaned dirs cannot claim a file (it may belong to
3175 * another non-orphaned dir).
3178 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3180 vnodeEssence->parent = dir->vnodeNumber;
3181 vnodeEssence->changed = 1;
3183 /* Vnode was claimed by another directory */
3186 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3187 } else if (vnodeNumber == 1) {
3188 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3190 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3194 CopyOnWrite(salvinfo, dir);
3195 assert(Delete(&dir->dirHandle, name) == 0);
3200 /* This directory claims the vnode */
3201 vnodeEssence->claimed = 1;
3203 vnodeEssence->count--;
3208 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3209 VnodeClass class, Inode ino, Unique * maxu)
3211 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3212 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3213 char buf[SIZEOF_LARGEDISKVNODE];
3214 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3216 StreamHandle_t *file;
3221 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3222 fdP = IH_OPEN(vip->handle);
3223 assert(fdP != NULL);
3224 file = FDH_FDOPEN(fdP, "r+");
3225 assert(file != NULL);
3226 size = OS_SIZE(fdP->fd_fd);
3228 vip->nVnodes = (size / vcp->diskSize) - 1;
3229 if (vip->nVnodes > 0) {
3230 assert((vip->nVnodes + 1) * vcp->diskSize == size);
3231 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
3232 assert((vip->vnodes = (struct VnodeEssence *)
3233 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3234 if (class == vLarge) {
3235 assert((vip->inodes = (Inode *)
3236 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3245 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3246 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3247 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3248 nVnodes--, vnodeIndex++) {
3249 if (vnode->type != vNull) {
3250 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3251 afs_fsize_t vnodeLength;
3252 vip->nAllocatedVnodes++;
3253 vep->count = vnode->linkCount;
3254 VNDISK_GET_LEN(vnodeLength, vnode);
3255 vep->blockCount = nBlocks(vnodeLength);
3256 vip->volumeBlockCount += vep->blockCount;
3257 vep->parent = vnode->parent;
3258 vep->unique = vnode->uniquifier;
3259 if (*maxu < vnode->uniquifier)
3260 *maxu = vnode->uniquifier;
3261 vep->modeBits = vnode->modeBits;
3262 vep->InodeNumber = VNDISK_GET_INO(vnode);
3263 vep->type = vnode->type;
3264 vep->author = vnode->author;
3265 vep->owner = vnode->owner;
3266 vep->group = vnode->group;
3267 if (vnode->type == vDirectory) {
3268 if (class != vLarge) {
3269 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3270 vip->nAllocatedVnodes--;
3271 memset(vnode, 0, sizeof(vnode));
3272 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3273 vnodeIndexOffset(vcp, vnodeNumber),
3274 (char *)&vnode, sizeof(vnode));
3275 salvinfo->VolumeChanged = 1;
3277 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3286 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3289 struct VnodeEssence *parentvp;
3295 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3296 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3298 strcat(path, vp->name);
3304 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3305 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3308 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3310 struct VnodeEssence *vep;
3313 return (1); /* Vnode zero does not exist */
3315 return (0); /* The root dir vnode is always claimed */
3316 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3317 if (!vep || !vep->claimed)
3318 return (1); /* Vnode is not claimed - it is orphaned */
3320 return (IsVnodeOrphaned(salvinfo, vep->parent));
3324 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3325 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3326 struct DirSummary *rootdir, int *rootdirfound)
3328 static struct DirSummary dir;
3329 static struct DirHandle dirHandle;
3330 struct VnodeEssence *parent;
3331 static char path[MAXPATHLEN];
3334 if (dirVnodeInfo->vnodes[i].salvaged)
3335 return; /* already salvaged */
3338 dirVnodeInfo->vnodes[i].salvaged = 1;
3340 if (dirVnodeInfo->inodes[i] == 0)
3341 return; /* Not allocated to a directory */
3343 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3344 if (dirVnodeInfo->vnodes[i].parent) {
3345 Log("Bad parent, vnode 1; %s...\n",
3346 (Testing ? "skipping" : "salvaging"));
3347 dirVnodeInfo->vnodes[i].parent = 0;
3348 dirVnodeInfo->vnodes[i].changed = 1;
3351 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3352 if (parent && parent->salvaged == 0)
3353 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3354 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3355 rootdir, rootdirfound);
3358 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3359 dir.unique = dirVnodeInfo->vnodes[i].unique;
3362 dir.parent = dirVnodeInfo->vnodes[i].parent;
3363 dir.haveDot = dir.haveDotDot = 0;
3364 dir.ds_linkH = alinkH;
3365 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3366 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3368 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3371 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3372 (Testing ? "skipping" : "salvaging"));
3375 CopyAndSalvage(salvinfo, &dir);
3377 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3380 dirHandle = dir.dirHandle;
3383 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3384 &dirVnodeInfo->vnodes[i], path);
3387 /* If enumeration failed for random reasons, we will probably delete
3388 * too much stuff, so we guard against this instead.
3390 struct judgeEntry_params judge_params;
3391 judge_params.salvinfo = salvinfo;
3392 judge_params.dir = &dir;
3394 assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3397 /* Delete the old directory if it was copied in order to salvage.
3398 * CopyOnWrite has written the new inode # to the disk, but we still
3399 * have the old one in our local structure here. Thus, we idec the
3403 if (dir.copied && !Testing) {
3404 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3406 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3409 /* Remember rootdir DirSummary _after_ it has been judged */
3410 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3411 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3419 * Get a new FID that can be used to create a new file.
3421 * @param[in] volHeader vol header for the volume
3422 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3423 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3424 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3425 * updated to the new max unique if we create a new
3429 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3430 VnodeClass class, AFSFid *afid, Unique *maxunique)
3433 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3434 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3438 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3439 /* no free vnodes; make a new one */
3440 salvinfo->vnodeInfo[class].nVnodes++;
3441 salvinfo->vnodeInfo[class].vnodes =
3442 realloc(salvinfo->vnodeInfo[class].vnodes,
3443 sizeof(struct VnodeEssence) * (i+1));
3445 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3448 afid->Vnode = bitNumberToVnodeNumber(i, class);
3450 if (volHeader->uniquifier < (*maxunique + 1)) {
3451 /* header uniq is bad; it will get bumped by 2000 later */
3452 afid->Unique = *maxunique + 1 + 2000;
3455 /* header uniq seems okay; just use that */
3456 afid->Unique = *maxunique = volHeader->uniquifier++;
3461 * Create a vnode for a README file explaining not to use a recreated-root vol.
3463 * @param[in] volHeader vol header for the volume
3464 * @param[in] alinkH ihandle for i/o for the volume
3465 * @param[in] vid volume id
3466 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3467 * updated to the new max unique if we create a new
3469 * @param[out] afid FID for the new readme vnode
3470 * @param[out] ainode the inode for the new readme file
3472 * @return operation status
3477 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3478 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3482 struct VnodeDiskObject *rvnode = NULL;
3484 IHandle_t *readmeH = NULL;
3485 struct VnodeEssence *vep;
3487 time_t now = time(NULL);
3489 /* Try to make the note brief, but informative. Only administrators should
3490 * be able to read this file at first, so we can hopefully assume they
3491 * know what AFS is, what a volume is, etc. */
3493 "This volume has been salvaged, but has lost its original root directory.\n"
3494 "The root directory that exists now has been recreated from orphan files\n"
3495 "from the rest of the volume. This recreated root directory may interfere\n"
3496 "with old cached data on clients, and there is no way the salvager can\n"
3497 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3498 "use this volume, but only copy the salvaged data to a new volume.\n"
3499 "Continuing to use this volume as it exists now may cause some clients to\n"
3500 "behave oddly when accessing this volume.\n"
3501 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3502 /* ^ the person reading this probably just lost some data, so they could
3503 * use some cheering up. */
3505 /* -1 for the trailing NUL */
3506 length = sizeof(readme) - 1;
3508 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3510 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3512 /* create the inode and write the contents */
3513 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3514 salvinfo->fileSysPath, 0, vid,
3515 afid->Vnode, afid->Unique, 1);
3516 if (!VALID_INO(readmeinode)) {
3517 Log("CreateReadme: readme IH_CREATE failed\n");
3521 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3522 bytes = IH_IWRITE(readmeH, 0, readme, length);
3523 IH_RELEASE(readmeH);
3525 if (bytes != length) {
3526 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3527 (int)sizeof(readme));
3531 /* create the vnode and write it out */
3532 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3534 Log("CreateRootDir: error alloc'ing memory\n");
3538 rvnode->type = vFile;
3540 rvnode->modeBits = 0777;
3541 rvnode->linkCount = 1;
3542 VNDISK_SET_LEN(rvnode, length);
3543 rvnode->uniquifier = afid->Unique;
3544 rvnode->dataVersion = 1;
3545 VNDISK_SET_INO(rvnode, readmeinode);
3546 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3551 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3553 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3554 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3555 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3557 if (bytes != SIZEOF_SMALLDISKVNODE) {
3558 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3559 (int)SIZEOF_SMALLDISKVNODE);
3563 /* update VnodeEssence for new readme vnode */
3564 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3566 vep->blockCount = nBlocks(length);
3567 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3568 vep->parent = rvnode->parent;
3569 vep->unique = rvnode->uniquifier;
3570 vep->modeBits = rvnode->modeBits;
3571 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3572 vep->type = rvnode->type;
3573 vep->author = rvnode->author;
3574 vep->owner = rvnode->owner;
3575 vep->group = rvnode->group;
3585 *ainode = readmeinode;
3590 if (IH_DEC(alinkH, readmeinode, vid)) {
3591 Log("CreateReadme (recovery): IH_DEC failed\n");
3603 * create a root dir for a volume that lacks one.
3605 * @param[in] volHeader vol header for the volume
3606 * @param[in] alinkH ihandle for disk access for this volume group
3607 * @param[in] vid volume id we're dealing with
3608 * @param[out] rootdir populated with info about the new root dir
3609 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3610 * updated to the new max unique if we create a new
3613 * @return operation status
3618 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3619 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3623 int decroot = 0, decreadme = 0;
3624 AFSFid did, readmeid;
3627 struct VnodeDiskObject *rootvnode = NULL;
3628 struct acl_accessList *ACL;
3631 struct VnodeEssence *vep;
3633 time_t now = time(NULL);
3635 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3636 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3640 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3641 /* We don't have any large vnodes in the volume; allocate room
3642 * for one so we can recreate the root dir */
3643 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3644 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3645 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3647 assert(salvinfo->vnodeInfo[vLarge].vnodes);
3648 assert(salvinfo->vnodeInfo[vLarge].inodes);
3651 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3652 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3653 if (vep->type != vNull) {
3654 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3658 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3659 &readmeinode) != 0) {
3664 /* set the DV to a very high number, so it is unlikely that we collide
3665 * with a cached DV */
3668 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3670 if (!VALID_INO(rootinode)) {
3671 Log("CreateRootDir: IH_CREATE failed\n");
3676 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3677 rootinode, &salvinfo->VolumeChanged);
3681 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3682 Log("CreateRootDir: MakeDir failed\n");
3685 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3686 Log("CreateRootDir: Create failed\n");
3690 length = Length(&rootdir->dirHandle);
3691 DZap((void *)&rootdir->dirHandle);
3693 /* create the new root dir vnode */
3694 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3696 Log("CreateRootDir: malloc failed\n");
3700 /* only give 'rl' permissions to 'system:administrators'. We do this to
3701 * try to catch the attention of an administrator, that they should not
3702 * be writing to this directory or continue to use it. */
3703 ACL = VVnodeDiskACL(rootvnode);
3704 ACL->size = sizeof(struct acl_accessList);
3705 ACL->version = ACL_ACLVERSION;
3709 ACL->entries[0].id = -204; /* system:administrators */
3710 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3712 rootvnode->type = vDirectory;
3713 rootvnode->cloned = 0;
3714 rootvnode->modeBits = 0777;
3715 rootvnode->linkCount = 2;
3716 VNDISK_SET_LEN(rootvnode, length);
3717 rootvnode->uniquifier = 1;
3718 rootvnode->dataVersion = dv;
3719 VNDISK_SET_INO(rootvnode, rootinode);
3720 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3721 rootvnode->author = 0;
3722 rootvnode->owner = 0;
3723 rootvnode->parent = 0;
3724 rootvnode->group = 0;
3725 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3727 /* write it out to disk */
3728 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3729 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3730 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3732 if (bytes != SIZEOF_LARGEDISKVNODE) {
3733 /* just cast to int and don't worry about printing real 64-bit ints;
3734 * a large disk vnode isn't anywhere near the 32-bit limit */
3735 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3736 (int)SIZEOF_LARGEDISKVNODE);
3740 /* update VnodeEssence for the new root vnode */
3741 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3743 vep->blockCount = nBlocks(length);
3744 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3745 vep->parent = rootvnode->parent;
3746 vep->unique = rootvnode->uniquifier;
3747 vep->modeBits = rootvnode->modeBits;
3748 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3749 vep->type = rootvnode->type;
3750 vep->author = rootvnode->author;
3751 vep->owner = rootvnode->owner;
3752 vep->group = rootvnode->group;
3762 /* update DirSummary for the new root vnode */
3763 rootdir->vnodeNumber = 1;
3764 rootdir->unique = 1;
3765 rootdir->haveDot = 1;
3766 rootdir->haveDotDot = 1;
3767 rootdir->rwVid = vid;
3768 rootdir->copied = 0;
3769 rootdir->parent = 0;
3770 rootdir->name = strdup(".");
3771 rootdir->vname = volHeader->name;
3772 rootdir->ds_linkH = alinkH;
3779 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3780 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3782 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3783 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3793 * salvage a volume group.
3795 * @param[in] salvinfo information for the curent salvage job
3796 * @param[in] rwIsp inode summary for rw volume
3797 * @param[in] alinkH link table inode handle
3799 * @return operation status
3803 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3805 /* This routine, for now, will only be called for read-write volumes */
3807 int BlocksInVolume = 0, FilesInVolume = 0;
3809 struct DirSummary rootdir, oldrootdir;
3810 struct VnodeInfo *dirVnodeInfo;
3811 struct VnodeDiskObject vnode;
3812 VolumeDiskData volHeader;
3814 int orphaned, rootdirfound = 0;
3815 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3816 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3817 struct VnodeEssence *vep;
3820 afs_sfsize_t nBytes;
3822 VnodeId LFVnode, ThisVnode;
3823 Unique LFUnique, ThisUnique;
3827 vid = rwIsp->volSummary->header.id;
3828 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3829 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3830 assert(nBytes == sizeof(volHeader));
3831 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3832 assert(volHeader.destroyMe != DESTROY_ME);
3833 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3835 DistilVnodeEssence(salvinfo, vid, vLarge,
3836 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3837 DistilVnodeEssence(salvinfo, vid, vSmall,
3838 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3840 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3841 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3842 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3843 &rootdir, &rootdirfound);
3846 nt_sync(salvinfo->fileSysDevice);
3848 sync(); /* This used to be done lower level, for every dir */
3855 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3857 Log("Cannot find root directory for volume %lu; attempting to create "
3858 "a new one\n", afs_printable_uint32_lu(vid));
3860 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3865 salvinfo->VolumeChanged = 1;
3869 /* Parse each vnode looking for orphaned vnodes and
3870 * connect them to the tree as orphaned (if requested).
3872 oldrootdir = rootdir;
3873 for (class = 0; class < nVNODECLASSES; class++) {
3874 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3875 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3876 ThisVnode = bitNumberToVnodeNumber(v, class);
3877 ThisUnique = vep->unique;
3879 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3880 continue; /* Ignore unused, claimed, and root vnodes */
3882 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3883 * entry in this vnode had incremented the parent link count (In
3884 * JudgeEntry()). We need to go to the parent and decrement that
3885 * link count. But if the parent's unique is zero, then the parent
3886 * link count was not incremented in JudgeEntry().
3888 if (class == vLarge) { /* directory vnode */
3889 pv = vnodeIdToBitNumber(vep->parent);
3890 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3891 if (vep->parent == 1 && newrootdir) {
3892 /* this vnode's parent was the volume root, and
3893 * we just created the volume root. So, the parent
3894 * dir didn't exist during JudgeEntry, so the link
3895 * count was not inc'd there, so don't dec it here.
3901 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3907 continue; /* If no rootdir, can't attach orphaned files */
3909 /* Here we attach orphaned files and directories into the
3910 * root directory, LVVnode, making sure link counts stay correct.
3912 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3913 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3914 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3916 /* Update this orphaned vnode's info. Its parent info and
3917 * link count (do for orphaned directories and files).
3919 vep->parent = LFVnode; /* Parent is the root dir */
3920 vep->unique = LFUnique;
3923 vep->count--; /* Inc link count (root dir will pt to it) */
3925 /* If this orphaned vnode is a directory, change '..'.
3926 * The name of the orphaned dir/file is unknown, so we
3927 * build a unique name. No need to CopyOnWrite the directory
3928 * since it is not connected to tree in BK or RO volume and
3929 * won't be visible there.
3931 if (class == vLarge) {
3935 /* Remove and recreate the ".." entry in this orphaned directory */
3936 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3937 salvinfo->vnodeInfo[class].inodes[v],
3938 &salvinfo->VolumeChanged);
3940 pa.Unique = LFUnique;
3941 assert(Delete(&dh, "..") == 0);
3942 assert(Create(&dh, "..", &pa) == 0);
3944 /* The original parent's link count was decremented above.
3945 * Here we increment the new parent's link count.
3947 pv = vnodeIdToBitNumber(LFVnode);
3948 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3952 /* Go to the root dir and add this entry. The link count of the
3953 * root dir was incremented when ".." was created. Try 10 times.
3955 for (j = 0; j < 10; j++) {
3956 pa.Vnode = ThisVnode;
3957 pa.Unique = ThisUnique;
3959 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3961 vLarge) ? "__ORPHANDIR__" :
3962 "__ORPHANFILE__"), ThisVnode,
3965 CopyOnWrite(salvinfo, &rootdir);
3966 code = Create(&rootdir.dirHandle, npath, &pa);
3970 ThisUnique += 50; /* Try creating a different file */
3973 Log("Attaching orphaned %s to volume's root dir as %s\n",
3974 ((class == vLarge) ? "directory" : "file"), npath);
3976 } /* for each vnode in the class */
3977 } /* for each class of vnode */
3979 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3981 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3983 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3986 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3989 DFlush(); /* Flush the changes */
3990 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3991 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3992 orphans = ORPH_IGNORE;
3995 /* Write out all changed vnodes. Orphaned files and directories
3996 * will get removed here also (if requested).
3998 for (class = 0; class < nVNODECLASSES; class++) {
3999 int nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4000 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4001 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4002 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4003 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4004 for (i = 0; i < nVnodes; i++) {
4005 struct VnodeEssence *vnp = &vnodes[i];
4006 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4008 /* If the vnode is good but is unclaimed (not listed in
4009 * any directory entries), then it is orphaned.
4012 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4013 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4017 if (vnp->changed || vnp->count) {
4020 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4021 vnodeIndexOffset(vcp, vnodeNumber),
4022 (char *)&vnode, sizeof(vnode));
4023 assert(nBytes == sizeof(vnode));
4025 vnode.parent = vnp->parent;
4026 oldCount = vnode.linkCount;
4027 vnode.linkCount = vnode.linkCount - vnp->count;
4030 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4032 if (!vnp->todelete) {
4033 /* Orphans should have already been attached (if requested) */
4034 assert(orphans != ORPH_ATTACH);
4035 oblocks += vnp->blockCount;
4038 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4040 BlocksInVolume -= vnp->blockCount;
4042 if (VNDISK_GET_INO(&vnode)) {
4044 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4047 memset(&vnode, 0, sizeof(vnode));
4049 } else if (vnp->count) {
4051 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4054 vnode.modeBits = vnp->modeBits;
4057 vnode.dataVersion++;
4060 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4061 vnodeIndexOffset(vcp, vnodeNumber),
4062 (char *)&vnode, sizeof(vnode));
4063 assert(nBytes == sizeof(vnode));
4065 salvinfo->VolumeChanged = 1;
4069 if (!Showmode && ofiles) {
4070 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4072 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4076 for (class = 0; class < nVNODECLASSES; class++) {
4077 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4078 for (i = 0; i < vip->nVnodes; i++)
4079 if (vip->vnodes[i].name)
4080 free(vip->vnodes[i].name);
4087 /* Set correct resource utilization statistics */
4088 volHeader.filecount = FilesInVolume;
4089 volHeader.diskused = BlocksInVolume;
4091 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4092 if (volHeader.uniquifier < (maxunique + 1)) {
4094 Log("Volume uniquifier is too low; fixed\n");
4095 /* Plus 2,000 in case there are workstations out there with
4096 * cached vnodes that have since been deleted
4098 volHeader.uniquifier = (maxunique + 1 + 2000);
4102 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4103 "Only use this salvaged volume to copy data to another volume; "
4104 "do not continue to use this volume (%lu) as-is.\n",
4105 afs_printable_uint32_lu(vid));
4108 #ifdef FSSYNC_BUILD_CLIENT
4109 if (!Testing && salvinfo->VolumeChanged) {
4110 afs_int32 fsync_code;
4112 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4114 Log("Error trying to tell the fileserver to break callbacks for "
4115 "changed volume %lu; error code %ld\n",
4116 afs_printable_uint32_lu(vid),
4117 afs_printable_int32_ld(fsync_code));
4119 salvinfo->VolumeChanged = 0;
4122 #endif /* FSSYNC_BUILD_CLIENT */
4124 /* Turn off the inUse bit; the volume's been salvaged! */
4125 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4126 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4127 volHeader.inService = 1; /* allow service again */
4128 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4129 volHeader.dontSalvage = DONT_SALVAGE;
4130 salvinfo->VolumeChanged = 0;
4132 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4133 assert(nBytes == sizeof(volHeader));
4136 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4137 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4138 FilesInVolume, BlocksInVolume);
4141 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4142 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4148 ClearROInUseBit(struct VolumeSummary *summary)
4150 IHandle_t *h = summary->volumeInfoHandle;
4151 afs_sfsize_t nBytes;
4153 VolumeDiskData volHeader;
4155 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4156 assert(nBytes == sizeof(volHeader));
4157 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4158 volHeader.inUse = 0;
4159 volHeader.needsSalvaged = 0;
4160 volHeader.inService = 1;
4161 volHeader.dontSalvage = DONT_SALVAGE;
4163 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4164 assert(nBytes == sizeof(volHeader));
4169 * Possible delete the volume.
4171 * deleteMe - Always do so, only a partial volume.
4174 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4175 char *message, int deleteMe, int check)
4177 if (readOnly(isp) || deleteMe) {
4178 if (isp->volSummary && isp->volSummary->fileName) {
4181 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4183 Log("It will be deleted on this server (you may find it elsewhere)\n");
4186 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4188 Log("it will be deleted instead. It should be recloned.\n");
4193 sprintf(path, "%s/%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4195 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4197 Log("Error %ld destroying volume disk header for volume %lu\n",
4198 afs_printable_int32_ld(code),
4199 afs_printable_uint32_lu(isp->volumeId));
4202 /* make sure we actually delete the fileName file; ENOENT
4203 * is fine, since VDestroyVolumeDiskHeader probably already
4205 if (unlink(path) && errno != ENOENT) {
4206 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4210 } else if (!check) {
4211 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4213 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4217 #ifdef AFS_DEMAND_ATTACH_FS
4219 * Locks a volume on disk for salvaging.
4221 * @param[in] volumeId volume ID to lock
4223 * @return operation status
4225 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4226 * checked out and locked again
4231 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4236 /* should always be WRITE_LOCK, but keep the lock-type logic all
4237 * in one place, in VVolLockType. Params will be ignored, but
4238 * try to provide what we're logically doing. */
4239 locktype = VVolLockType(V_VOLUPD, 1);
4241 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4243 if (code == EBUSY) {
4244 Abort("Someone else appears to be using volume %lu; Aborted\n",
4245 afs_printable_uint32_lu(volumeId));
4247 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4248 afs_printable_int32_ld(code),
4249 afs_printable_uint32_lu(volumeId));
4252 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4253 if (code == SYNC_DENIED) {
4254 /* need to retry checking out volumes */
4257 if (code != SYNC_OK) {
4258 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4259 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4262 /* set inUse = programType in the volume header to ensure that nobody
4263 * tries to use this volume again without salvaging, if we somehow crash
4264 * or otherwise exit before finishing the salvage.
4268 struct VolumeHeader header;
4269 struct VolumeDiskHeader diskHeader;
4270 struct VolumeDiskData volHeader;
4272 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4277 DiskToVolumeHeader(&header, &diskHeader);
4279 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4280 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4281 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4287 volHeader.inUse = programType;
4289 /* If we can't re-write the header, bail out and error. We don't
4290 * assert when reading the header, since it's possible the
4291 * header isn't really there (when there's no data associated
4292 * with the volume; we just delete the vol header file in that
4293 * case). But if it's there enough that we can read it, but
4294 * somehow we cannot write to it to signify we're salvaging it,
4295 * we've got a big problem and we cannot continue. */
4296 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4303 #endif /* AFS_DEMAND_ATTACH_FS */
4306 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4311 memset(&res, 0, sizeof(res));
4313 for (i = 0; i < 3; i++) {
4314 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4315 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4317 if (code == SYNC_OK) {
4319 } else if (code == SYNC_DENIED) {
4320 #ifdef DEMAND_ATTACH_ENABLE
4321 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4323 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4325 Abort("Salvage aborted\n");
4326 } else if (code == SYNC_BAD_COMMAND) {
4327 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4329 #ifdef DEMAND_ATTACH_ENABLE
4330 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4332 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4334 Abort("Salvage aborted\n");
4337 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4338 FSYNC_clientFinis();
4342 if (code != SYNC_OK) {
4343 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4344 Abort("Salvage aborted\n");
4349 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4353 for (i = 0; i < 3; i++) {
4354 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4355 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4357 if (code == SYNC_OK) {
4359 } else if (code == SYNC_DENIED) {
4360 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4361 } else if (code == SYNC_BAD_COMMAND) {
4362 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4364 #ifdef DEMAND_ATTACH_ENABLE
4365 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4367 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4372 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4373 FSYNC_clientFinis();
4380 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4382 /* Volume parameter is passed in case iopen is upgraded in future to
4383 * require a volume Id to be passed
4386 IHandle_t *srcH, *destH;
4387 FdHandle_t *srcFdP, *destFdP;
4390 IH_INIT(srcH, device, rwvolume, inode1);
4391 srcFdP = IH_OPEN(srcH);
4392 assert(srcFdP != NULL);
4393 IH_INIT(destH, device, rwvolume, inode2);
4394 destFdP = IH_OPEN(destH);
4395 while ((nBytes = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
4396 assert(FDH_WRITE(destFdP, buf, nBytes) == nBytes);
4397 assert(nBytes == 0);
4398 FDH_REALLYCLOSE(srcFdP);
4399 FDH_REALLYCLOSE(destFdP);
4406 PrintInodeList(struct SalvInfo *salvinfo)
4408 struct ViceInodeInfo *ip;
4409 struct ViceInodeInfo *buf;
4410 struct afs_stat status;
4414 assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4415 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4416 assert(buf != NULL);
4417 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4418 assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4419 for (ip = buf; nInodes--; ip++) {
4420 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4421 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4422 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4423 ip->u.param[2], ip->u.param[3]);
4429 PrintInodeSummary(struct SalvInfo *salvinfo)
4432 struct InodeSummary *isp;
4434 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4435 isp = &salvinfo->inodeSummary[i];
4436 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4441 PrintVolumeSummary(struct SalvInfo *salvinfo)
4444 struct VolumeSummary *vsp;
4446 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4447 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4457 assert(0); /* Fork is never executed in the NT code path */
4461 #ifdef AFS_DEMAND_ATTACH_FS
4462 if ((f == 0) && (programType == salvageServer)) {
4463 /* we are a salvageserver child */
4464 #ifdef FSSYNC_BUILD_CLIENT
4465 VChildProcReconnectFS_r();
4467 #ifdef SALVSYNC_BUILD_CLIENT
4471 #endif /* AFS_DEMAND_ATTACH_FS */
4472 #endif /* !AFS_NT40_ENV */
4482 #ifdef AFS_DEMAND_ATTACH_FS
4483 if (programType == salvageServer) {
4484 #ifdef SALVSYNC_BUILD_CLIENT
4487 #ifdef FSSYNC_BUILD_CLIENT
4491 #endif /* AFS_DEMAND_ATTACH_FS */
4494 if (main_thread != pthread_self())
4495 pthread_exit((void *)code);
4508 pid = wait(&status);
4510 if (WCOREDUMP(status))
4511 Log("\"%s\" core dumped!\n", prog);
4512 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4518 TimeStamp(time_t clock, int precision)
4521 static char timestamp[20];
4522 lt = localtime(&clock);
4524 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4526 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4531 CheckLogFile(char * log_path)
4533 char oldSlvgLog[AFSDIR_PATH_MAX];
4535 #ifndef AFS_NT40_ENV
4542 strcpy(oldSlvgLog, log_path);
4543 strcat(oldSlvgLog, ".old");
4545 renamefile(log_path, oldSlvgLog);
4546 logFile = afs_fopen(log_path, "a");
4548 if (!logFile) { /* still nothing, use stdout */
4552 #ifndef AFS_NAMEI_ENV
4553 AFS_DEBUG_IOPS_LOG(logFile);
4558 #ifndef AFS_NT40_ENV
4560 TimeStampLogFile(char * log_path)
4562 char stampSlvgLog[AFSDIR_PATH_MAX];
4567 lt = localtime(&now);
4568 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4569 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4570 log_path, lt->tm_year + 1900,
4571 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4574 /* try to link the logfile to a timestamped filename */
4575 /* if it fails, oh well, nothing we can do */
4576 link(log_path, stampSlvgLog);
4585 #ifndef AFS_NT40_ENV
4587 printf("Can't show log since using syslog.\n");
4598 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4601 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4604 while (fgets(line, sizeof(line), logFile))
4611 Log(const char *format, ...)
4617 va_start(args, format);
4618 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4620 #ifndef AFS_NT40_ENV
4622 syslog(LOG_INFO, "%s", tmp);
4626 gettimeofday(&now, 0);
4627 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4633 Abort(const char *format, ...)
4638 va_start(args, format);
4639 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4641 #ifndef AFS_NT40_ENV
4643 syslog(LOG_INFO, "%s", tmp);
4647 fprintf(logFile, "%s", tmp);
4659 ToString(const char *s)
4662 p = (char *)malloc(strlen(s) + 1);
4668 /* Remove the FORCESALVAGE file */
4670 RemoveTheForce(char *path)
4673 struct afs_stat force; /* so we can use afs_stat to find it */
4674 strcpy(target,path);
4675 strcat(target,"/FORCESALVAGE");
4676 if (!Testing && ForceSalvage) {
4677 if (afs_stat(target,&force) == 0) unlink(target);
4681 #ifndef AFS_AIX32_ENV
4683 * UseTheForceLuke - see if we can use the force
4686 UseTheForceLuke(char *path)
4688 struct afs_stat force;
4690 strcpy(target,path);
4691 strcat(target,"/FORCESALVAGE");
4693 return (afs_stat(target, &force) == 0);
4697 * UseTheForceLuke - see if we can use the force
4700 * The VRMIX fsck will not muck with the filesystem it is supposedly
4701 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4702 * muck directly with the root inode, which is within the normal
4704 * ListViceInodes() has a side effect of setting ForceSalvage if
4705 * it detects a need, based on root inode examination.
4708 UseTheForceLuke(char *path)
4711 return 0; /* sorry OB1 */
4716 /* NT support routines */
4718 static char execpathname[MAX_PATH];
4720 nt_SalvagePartition(char *partName, int jobn)
4725 if (!*execpathname) {
4726 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4727 if (!n || n == 1023)
4730 job.cj_magic = SALVAGER_MAGIC;
4731 job.cj_number = jobn;
4732 (void)strcpy(job.cj_part, partName);
4733 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4738 nt_SetupPartitionSalvage(void *datap, int len)
4740 childJob_t *jobp = (childJob_t *) datap;
4741 char logname[AFSDIR_PATH_MAX];
4743 if (len != sizeof(childJob_t))
4745 if (jobp->cj_magic != SALVAGER_MAGIC)
4750 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4752 logFile = afs_fopen(logname, "w");
4760 #endif /* AFS_NT40_ENV */