2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
207 /*@+fcnmacros +macrofcndecl@*/
210 extern off64_t afs_lseek(int FD, off64_t O, int F);
211 #endif /*S_SPLINT_S */
212 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
213 #define afs_stat stat64
214 #define afs_fstat fstat64
215 #define afs_open open64
216 #define afs_fopen fopen64
217 #else /* !O_LARGEFILE */
219 extern off_t afs_lseek(int FD, off_t O, int F);
220 #endif /*S_SPLINT_S */
221 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
222 #define afs_stat stat
223 #define afs_fstat fstat
224 #define afs_open open
225 #define afs_fopen fopen
226 #endif /* !O_LARGEFILE */
227 /*@=fcnmacros =macrofcndecl@*/
230 extern void *calloc();
232 static char *TimeStamp(time_t clock, int precision);
235 int debug; /* -d flag */
236 extern int Testing; /* -n flag */
237 int ListInodeOption; /* -i flag */
238 int ShowRootFiles; /* -r flag */
239 int RebuildDirs; /* -sal flag */
240 int Parallel = 4; /* -para X flag */
241 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
242 int forceR = 0; /* -b flag */
243 int ShowLog = 0; /* -showlog flag */
244 int ShowSuid = 0; /* -showsuid flag */
245 int ShowMounts = 0; /* -showmounts flag */
246 int orphans = ORPH_IGNORE; /* -orphans option */
251 int useSyslog = 0; /* -syslog flag */
252 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
261 #define MAXPARALLEL 32
263 int OKToZap; /* -o flag */
264 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
265 * in the volume header */
267 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
269 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
272 * information that is 'global' to a particular salvage job.
275 Device fileSysDevice; /**< The device number of the current partition
277 char fileSysPath[8]; /**< The path of the mounted partition currently
278 * being salvaged, i.e. the directory containing
279 * the volume headers */
280 char *fileSysPathName; /**< NT needs this to make name pretty log. */
281 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
282 int VGLinkH_cnt; /**< # of references to lnk handle. */
283 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
286 char *fileSysDeviceName; /**< The block device where the file system being
287 * salvaged was mounted */
288 char *filesysfulldev;
290 int VolumeChanged; /**< Set by any routine which would change the
291 * volume in a way which would require callbacks
292 * to be broken if the volume was put back on
293 * on line by an active file server */
295 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
296 * header dealt with */
298 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
299 int inodeFd; /**< File descriptor for inode file */
301 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
302 int nVolumes; /**< Number of volumes (read-write and read-only)
303 * in volume summary */
304 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
307 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
308 * vnodes in the volume that
309 * we are currently looking
311 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
312 * to contact the fileserver over FSYNC */
319 /* Forward declarations */
320 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
321 static int AskVolumeSummary(struct SalvInfo *salvinfo,
322 VolumeId singleVolumeNumber);
324 #ifdef AFS_DEMAND_ATTACH_FS
325 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
326 #endif /* AFS_DEMAND_ATTACH_FS */
328 /* Uniquifier stored in the Inode */
333 return (u & 0x3fffff);
335 #if defined(AFS_SGI_EXMAG)
336 return (u & SGI_UNIQMASK);
339 #endif /* AFS_SGI_EXMAG */
346 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
348 return 0; /* otherwise may be transient, e.g. EMFILE */
353 char *save_args[MAX_ARGS];
355 extern pthread_t main_thread;
356 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
360 * Get the salvage lock if not already held. Hold until process exits.
362 * @param[in] locktype READ_LOCK or WRITE_LOCK
365 _ObtainSalvageLock(int locktype)
367 struct VLockFile salvageLock;
372 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
374 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
377 "salvager: There appears to be another salvager running! "
382 "salvager: Error %d trying to acquire salvage lock! "
388 ObtainSalvageLock(void)
390 _ObtainSalvageLock(WRITE_LOCK);
393 ObtainSharedSalvageLock(void)
395 _ObtainSalvageLock(READ_LOCK);
399 #ifdef AFS_SGI_XFS_IOPS_ENV
400 /* Check if the given partition is mounted. For XFS, the root inode is not a
401 * constant. So we check the hard way.
404 IsPartitionMounted(char *part)
407 struct mntent *mntent;
409 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
410 while (mntent = getmntent(mntfp)) {
411 if (!strcmp(part, mntent->mnt_dir))
416 return mntent ? 1 : 1;
419 /* Check if the given inode is the root of the filesystem. */
420 #ifndef AFS_SGI_XFS_IOPS_ENV
422 IsRootInode(struct afs_stat *status)
425 * The root inode is not a fixed value in XFS partitions. So we need to
426 * see if the partition is in the list of mounted partitions. This only
427 * affects the SalvageFileSys path, so we check there.
429 return (status->st_ino == ROOTINODE);
434 #ifndef AFS_NAMEI_ENV
435 /* We don't want to salvage big files filesystems, since we can't put volumes on
439 CheckIfBigFilesFS(char *mountPoint, char *devName)
441 struct superblock fs;
444 if (strncmp(devName, "/dev/", 5)) {
445 (void)sprintf(name, "/dev/%s", devName);
447 (void)strcpy(name, devName);
450 if (ReadSuper(&fs, name) < 0) {
451 Log("Unable to read superblock. Not salvaging partition %s.\n",
455 if (IsBigFilesFileSystem(&fs)) {
456 Log("Partition %s is a big files filesystem, not salvaging.\n",
466 #define HDSTR "\\Device\\Harddisk"
467 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
469 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
475 static int dowarn = 1;
477 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
479 if (strncmp(res1, HDSTR, HDLEN)) {
482 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
483 res1, HDSTR, p1->devName);
486 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
488 if (strncmp(res2, HDSTR, HDLEN)) {
491 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
492 res2, HDSTR, p2->devName);
496 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
584 osi_Assert(pid != -1);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
589 osi_Assert(j < numjobs);
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
720 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 salvinfo->useFSYNC = 1;
794 AskOffline(salvinfo, singleVolumeNumber);
795 #ifdef AFS_DEMAND_ATTACH_FS
796 if (LockVolume(salvinfo, singleVolumeNumber)) {
799 #endif /* AFS_DEMAND_ATTACH_FS */
802 salvinfo->useFSYNC = 0;
803 VLockPartition(partP->name);
807 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
810 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
811 partP->name, name, (Testing ? "(READONLY mode)" : ""));
813 Log("***Forced salvage of all volumes on this partition***\n");
818 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
825 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
826 while ((dp = readdir(dirp))) {
827 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
828 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
830 Log("Removing old salvager temp files %s\n", dp->d_name);
831 strcpy(npath, salvinfo->fileSysPath);
832 strcat(npath, OS_DIRSEP);
833 strcat(npath, dp->d_name);
839 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
841 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
842 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
844 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
848 inodeFile = fopen(inodeListPath, "w+b");
850 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
853 /* Using nt_unlink here since we're really using the delete on close
854 * semantics of unlink. In most places in the salvager, we really do
855 * mean to unlink the file at that point. Those places have been
856 * modified to actually do that so that the NT crt can be used there.
858 * jaltman - On NT delete on close cannot be applied to a file while the
859 * process has an open file handle that does not have DELETE file
860 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
861 * delete privileges. As a result the nt_unlink() call will always
864 code = nt_unlink(inodeListPath);
866 code = unlink(inodeListPath);
869 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
872 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
876 salvinfo->inodeFd = fileno(inodeFile);
877 if (salvinfo->inodeFd == -1)
878 Abort("Temporary file %s is missing...\n", inodeListPath);
879 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
880 if (ListInodeOption) {
881 PrintInodeList(salvinfo);
884 /* enumerate volumes in the partition.
885 * figure out sets of read-only + rw volumes.
886 * salvage each set, read-only volumes first, then read-write.
887 * Fix up inodes on last volume in set (whether it is read-write
890 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
894 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
895 i < salvinfo->nVolumesInInodeFile; i = j) {
896 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
898 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
900 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
901 struct VolumeSummary *tsp;
902 /* Scan volume list (from partition root directory) looking for the
903 * current rw volume number in the volume list from the inode scan.
904 * If there is one here that is not in the inode volume list,
906 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
908 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
910 /* Now match up the volume summary info from the root directory with the
911 * entry in the volume list obtained from scanning inodes */
912 salvinfo->inodeSummary[j].volSummary = NULL;
913 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
914 if (tsp->header.id == vid) {
915 salvinfo->inodeSummary[j].volSummary = tsp;
921 /* Salvage the group of volumes (several read-only + 1 read/write)
922 * starting with the current read-only volume we're looking at.
924 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
927 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
928 for (; vsp < esp; vsp++) {
930 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
933 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
934 RemoveTheForce(salvinfo->fileSysPath);
936 if (!Testing && singleVolumeNumber) {
937 #ifdef AFS_DEMAND_ATTACH_FS
938 /* unlock vol headers so the fs can attach them when we AskOnline */
939 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
940 #endif /* AFS_DEMAND_ATTACH_FS */
942 AskOnline(salvinfo, singleVolumeNumber);
944 /* Step through the volumeSummary list and set all volumes on-line.
945 * The volumes were taken off-line in GetVolumeSummary.
947 for (j = 0; j < salvinfo->nVolumes; j++) {
948 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
952 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
953 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
956 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
960 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
963 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
966 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
969 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
971 Log("Error %ld destroying volume disk header for volume %lu\n",
972 afs_printable_int32_ld(code),
973 afs_printable_uint32_lu(vsp->header.id));
976 /* make sure we actually delete the fileName file; ENOENT
977 * is fine, since VDestroyVolumeDiskHeader probably already
979 if (unlink(path) && errno != ENOENT) {
980 Log("Unable to unlink %s (errno = %d)\n", path, errno);
987 CompareInodes(const void *_p1, const void *_p2)
989 const struct ViceInodeInfo *p1 = _p1;
990 const struct ViceInodeInfo *p2 = _p2;
991 if (p1->u.vnode.vnodeNumber == INODESPECIAL
992 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
993 VolumeId p1rwid, p2rwid;
995 (p1->u.vnode.vnodeNumber ==
996 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
998 (p2->u.vnode.vnodeNumber ==
999 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1000 if (p1rwid < p2rwid)
1002 if (p1rwid > p2rwid)
1004 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1005 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1006 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1007 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1008 if (p1->u.vnode.volumeId == p1rwid)
1010 if (p2->u.vnode.volumeId == p2rwid)
1012 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1014 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1015 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1016 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1018 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1020 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1022 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1024 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1026 /* The following tests are reversed, so that the most desirable
1027 * of several similar inodes comes first */
1028 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1029 #ifdef AFS_3DISPARES
1030 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1031 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1034 #ifdef AFS_SGI_EXMAG
1035 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1036 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1041 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1042 #ifdef AFS_3DISPARES
1043 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1044 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1047 #ifdef AFS_SGI_EXMAG
1048 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1049 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1054 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1055 #ifdef AFS_3DISPARES
1056 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1057 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1060 #ifdef AFS_SGI_EXMAG
1061 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1062 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1067 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1068 #ifdef AFS_3DISPARES
1069 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1070 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1073 #ifdef AFS_SGI_EXMAG
1074 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1075 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1084 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1085 struct InodeSummary *summary)
1087 VolumeId volume = ip->u.vnode.volumeId;
1088 VolumeId rwvolume = volume;
1093 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1095 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1097 rwvolume = ip->u.special.parentId;
1098 /* This isn't quite right, as there could (in error) be different
1099 * parent inodes in different special vnodes */
1101 if (maxunique < ip->u.vnode.vnodeUniquifier)
1102 maxunique = ip->u.vnode.vnodeUniquifier;
1106 summary->volumeId = volume;
1107 summary->RWvolumeId = rwvolume;
1108 summary->nInodes = n;
1109 summary->nSpecialInodes = nSpecial;
1110 summary->maxUniquifier = maxunique;
1114 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1116 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1117 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1118 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1123 * Collect list of inodes in file named by path. If a truly fatal error,
1124 * unlink the file and abort. For lessor errors, return -1. The file will
1125 * be unlinked by the caller.
1128 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1130 struct afs_stat status;
1133 struct ViceInodeInfo *ip, *ip_save;
1134 struct InodeSummary summary;
1135 char summaryFileName[50];
1138 char *dev = salvinfo->fileSysPath;
1139 char *wpath = salvinfo->fileSysPath;
1141 char *dev = salvinfo->fileSysDeviceName;
1142 char *wpath = salvinfo->filesysfulldev;
1144 char *part = salvinfo->fileSysPath;
1148 /* This file used to come from vfsck; cobble it up ourselves now... */
1150 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1151 singleVolumeNumber ? OnlyOneVolume : 0,
1152 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1154 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1157 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1159 if (forceSal && !ForceSalvage) {
1160 Log("***Forced salvage of all volumes on this partition***\n");
1163 fseek(inodeFile, 0L, SEEK_SET);
1164 salvinfo->inodeFd = fileno(inodeFile);
1165 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1166 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1168 tdir = (tmpdir ? tmpdir : part);
1170 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1171 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1173 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1174 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1176 summaryFile = afs_fopen(summaryFileName, "a+");
1177 if (summaryFile == NULL) {
1178 Abort("Unable to create inode summary file\n");
1182 /* Using nt_unlink here since we're really using the delete on close
1183 * semantics of unlink. In most places in the salvager, we really do
1184 * mean to unlink the file at that point. Those places have been
1185 * modified to actually do that so that the NT crt can be used there.
1187 * jaltman - As commented elsewhere, this cannot work because fopen()
1188 * does not open files with DELETE and FILE_SHARE_DELETE.
1190 code = nt_unlink(summaryFileName);
1192 code = unlink(summaryFileName);
1195 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1198 if (!canfork || debug || Fork() == 0) {
1200 unsigned long st_size=(unsigned long) status.st_size;
1201 nInodes = st_size / sizeof(struct ViceInodeInfo);
1203 fclose(summaryFile);
1204 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1205 RemoveTheForce(salvinfo->fileSysPath);
1207 struct VolumeSummary *vsp;
1210 GetVolumeSummary(salvinfo, singleVolumeNumber);
1212 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1214 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1217 Log("%s vice inodes on %s; not salvaged\n",
1218 singleVolumeNumber ? "No applicable" : "No", dev);
1221 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1223 fclose(summaryFile);
1225 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1228 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1229 fclose(summaryFile);
1230 Abort("Unable to read inode table; %s not salvaged\n", dev);
1232 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1233 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1234 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1235 fclose(summaryFile);
1236 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1241 CountVolumeInodes(ip, nInodes, &summary);
1242 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1243 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1244 fclose(summaryFile);
1247 summary.index += (summary.nInodes);
1248 nInodes -= summary.nInodes;
1249 ip += summary.nInodes;
1252 ip = ip_save = NULL;
1253 /* Following fflush is not fclose, because if it was debug mode would not work */
1254 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1255 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1256 fclose(summaryFile);
1259 if (canfork && !debug) {
1264 if (Wait("Inode summary") == -1) {
1265 fclose(summaryFile);
1266 Exit(1); /* salvage of this partition aborted */
1269 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1270 if (status.st_size != 0) {
1272 unsigned long st_status=(unsigned long)status.st_size;
1273 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1274 osi_Assert(salvinfo->inodeSummary != NULL);
1275 /* For GNU we need to do lseek to get the file pointer moved. */
1276 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1277 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1278 osi_Assert(ret == st_status);
1280 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1281 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1282 salvinfo->inodeSummary[i].volSummary = NULL;
1284 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1285 fclose(summaryFile);
1289 /* Comparison routine for volume sort.
1290 This is setup so that a read-write volume comes immediately before
1291 any read-only clones of that volume */
1293 CompareVolumes(const void *_p1, const void *_p2)
1295 const struct VolumeSummary *p1 = _p1;
1296 const struct VolumeSummary *p2 = _p2;
1297 if (p1->header.parent != p2->header.parent)
1298 return p1->header.parent < p2->header.parent ? -1 : 1;
1299 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1301 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1303 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1307 * Gleans volumeSummary information by asking the fileserver
1309 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1310 * salvaging a whole partition
1312 * @return whether we obtained the volume summary information or not
1313 * @retval 0 success; we obtained the volume summary information
1314 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1316 * @retval 1 we did not get the volume summary information; either the
1317 * fileserver responded with an error, or we are not supposed to
1318 * ask the fileserver for the information (e.g. we are salvaging
1319 * the entire partition or we are not the salvageserver)
1321 * @note for non-DAFS, always returns 1
1324 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1327 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1328 if (programType == salvageServer) {
1329 if (singleVolumeNumber) {
1330 FSSYNC_VGQry_response_t q_res;
1332 struct VolumeSummary *vsp;
1334 struct VolumeDiskHeader diskHdr;
1336 memset(&res, 0, sizeof(res));
1338 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1341 * We must wait for the partition to finish scanning before
1342 * can continue, since we will not know if we got the entire
1343 * VG membership unless the partition is fully scanned.
1344 * We could, in theory, just scan the partition ourselves if
1345 * the VG cache is not ready, but we would be doing the exact
1346 * same scan the fileserver is doing; it will almost always
1347 * be faster to wait for the fileserver. The only exceptions
1348 * are if the partition does not take very long to scan, and
1349 * in that case it's fast either way, so who cares?
1351 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1352 Log("waiting for fileserver to finish scanning partition %s...\n",
1353 salvinfo->fileSysPartition->name);
1355 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1356 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1357 * just so small partitions don't need to wait over 10
1358 * seconds every time, and large partitions are generally
1359 * polled only once every ten seconds. */
1360 sleep((i > 10) ? (i = 10) : i);
1362 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1366 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1367 /* This can happen if there's no header for the volume
1368 * we're salvaging, or no headers exist for the VG (if
1369 * we're salvaging an RW). Act as if we got a response
1370 * with no VG members. The headers may be created during
1371 * salvaging, if there are inodes in this VG. */
1373 memset(&q_res, 0, sizeof(q_res));
1374 q_res.rw = singleVolumeNumber;
1378 Log("fileserver refused VGCQuery request for volume %lu on "
1379 "partition %s, code %ld reason %ld\n",
1380 afs_printable_uint32_lu(singleVolumeNumber),
1381 salvinfo->fileSysPartition->name,
1382 afs_printable_int32_ld(code),
1383 afs_printable_int32_ld(res.hdr.reason));
1387 if (q_res.rw != singleVolumeNumber) {
1388 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1389 afs_printable_uint32_lu(singleVolumeNumber),
1390 afs_printable_uint32_lu(q_res.rw));
1391 #ifdef SALVSYNC_BUILD_CLIENT
1392 if (SALVSYNC_LinkVolume(q_res.rw,
1394 salvinfo->fileSysPartition->name,
1396 Log("schedule request failed\n");
1398 #endif /* SALVSYNC_BUILD_CLIENT */
1399 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1402 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1403 osi_Assert(salvinfo->volumeSummaryp != NULL);
1405 salvinfo->nVolumes = 0;
1406 vsp = salvinfo->volumeSummaryp;
1408 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1409 char name[VMAXPATHLEN];
1411 if (!q_res.children[i]) {
1415 /* AskOffline for singleVolumeNumber was called much earlier */
1416 if (q_res.children[i] != singleVolumeNumber) {
1417 AskOffline(salvinfo, q_res.children[i]);
1418 if (LockVolume(salvinfo, q_res.children[i])) {
1424 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1426 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1427 afs_printable_uint32_lu(q_res.children[i]));
1432 DiskToVolumeHeader(&vsp->header, &diskHdr);
1433 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1434 vsp->fileName = ToString(name);
1435 salvinfo->nVolumes++;
1439 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1444 Log("Cannot get volume summary from fileserver; falling back to scanning "
1445 "entire partition\n");
1448 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1453 * count how many volume headers are found by VWalkVolumeHeaders.
1455 * @param[in] dp the disk partition (unused)
1456 * @param[in] name full path to the .vol header (unused)
1457 * @param[in] hdr the header data (unused)
1458 * @param[in] last whether this is the last try or not (unused)
1459 * @param[in] rock actually an afs_int32*; the running count of how many
1460 * volumes we have found
1465 CountHeader(struct DiskPartition64 *dp, const char *name,
1466 struct VolumeDiskHeader *hdr, int last, void *rock)
1468 afs_int32 *nvols = (afs_int32 *)rock;
1474 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1477 struct SalvageScanParams {
1478 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1479 * vol id of the VG we're salvaging */
1480 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1481 * we're filling in */
1482 afs_int32 nVolumes; /**< # of vols we've encountered */
1483 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1484 * # of vols we've alloc'd memory for) */
1485 int retry; /**< do we need to retry vol lock/checkout? */
1486 struct SalvInfo *salvinfo; /**< salvage job info */
1490 * records volume summary info found from VWalkVolumeHeaders.
1492 * Found volumes are also taken offline if they are in the specific volume
1493 * group we are looking for.
1495 * @param[in] dp the disk partition
1496 * @param[in] name full path to the .vol header
1497 * @param[in] hdr the header data
1498 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1499 * @param[in] rock actually a struct SalvageScanParams*, containing the
1500 * information needed to record the volume summary data
1502 * @return operation status
1504 * @retval -1 volume locking raced with fileserver restart; checking out
1505 * and locking volumes needs to be retried
1506 * @retval 1 volume header is mis-named and should be deleted
1509 RecordHeader(struct DiskPartition64 *dp, const char *name,
1510 struct VolumeDiskHeader *hdr, int last, void *rock)
1512 char nameShouldBe[64];
1513 struct SalvageScanParams *params;
1514 struct VolumeSummary summary;
1515 VolumeId singleVolumeNumber;
1516 struct SalvInfo *salvinfo;
1518 params = (struct SalvageScanParams *)rock;
1520 singleVolumeNumber = params->singleVolumeNumber;
1521 salvinfo = params->salvinfo;
1523 DiskToVolumeHeader(&summary.header, hdr);
1525 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1526 && summary.header.parent != singleVolumeNumber) {
1528 if (programType == salvageServer) {
1529 #ifdef SALVSYNC_BUILD_CLIENT
1530 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1531 summary.header.id, summary.header.parent);
1532 if (SALVSYNC_LinkVolume(summary.header.parent,
1536 Log("schedule request failed\n");
1539 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1542 Log("%u is a read-only volume; not salvaged\n",
1543 singleVolumeNumber);
1548 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1549 || summary.header.parent == singleVolumeNumber) {
1551 /* check if the header file is incorrectly named */
1553 const char *base = strrchr(name, OS_DIRSEPC);
1560 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1561 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1564 if (strcmp(nameShouldBe, base)) {
1565 /* .vol file has wrong name; retry/delete */
1569 if (!badname || last) {
1570 /* only offline the volume if the header is good, or if this is
1571 * the last try looking at it; avoid AskOffline'ing the same vol
1574 if (singleVolumeNumber
1575 && summary.header.id != singleVolumeNumber) {
1576 /* don't offline singleVolumeNumber; we already did that
1579 AskOffline(salvinfo, summary.header.id);
1581 #ifdef AFS_DEMAND_ATTACH_FS
1583 /* don't lock the volume if the header is bad, since we're
1584 * about to delete it anyway. */
1585 if (LockVolume(salvinfo, summary.header.id)) {
1590 #endif /* AFS_DEMAND_ATTACH_FS */
1594 if (last && !Showmode) {
1595 Log("Volume header file %s is incorrectly named (should be %s "
1596 "not %s); %sdeleted (it will be recreated later, if "
1597 "necessary)\n", name, nameShouldBe, base,
1598 (Testing ? "it would have been " : ""));
1603 summary.fileName = ToString(base);
1606 if (params->nVolumes > params->totalVolumes) {
1607 /* We found more volumes than we found on the first partition walk;
1608 * apparently something created a volume while we were
1609 * partition-salvaging, or we found more than 20 vols when salvaging a
1610 * particular volume. Abort if we detect this, since other programs
1611 * supposed to not touch the partition while it is partition-salvaging,
1612 * and we shouldn't find more than 20 vols in a VG.
1614 Abort("Found %ld vol headers, but should have found at most %ld! "
1615 "Make sure the volserver/fileserver are not running at the "
1616 "same time as a partition salvage\n",
1617 afs_printable_int32_ld(params->nVolumes),
1618 afs_printable_int32_ld(params->totalVolumes));
1621 memcpy(params->vsp, &summary, sizeof(summary));
1629 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1631 * If the header could not be read in at all, the header is always unlinked.
1632 * If instead RecordHeader said the header was bad (that is, the header file
1633 * is mis-named), we only unlink if we are doing a partition salvage, as
1634 * opposed to salvaging a specific volume group.
1636 * @param[in] dp the disk partition
1637 * @param[in] name full path to the .vol header
1638 * @param[in] hdr header data, or NULL if the header could not be read
1639 * @param[in] rock actually a struct SalvageScanParams*, with some information
1643 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1644 struct VolumeDiskHeader *hdr, void *rock)
1646 struct SalvageScanParams *params;
1649 params = (struct SalvageScanParams *)rock;
1652 /* no header; header is too bogus to read in at all */
1654 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1660 } else if (!params->singleVolumeNumber) {
1661 /* We were able to read in a header, but RecordHeader said something
1662 * was wrong with it. We only unlink those if we are doing a partition
1669 if (dounlink && unlink(name)) {
1670 Log("Error %d while trying to unlink %s\n", errno, name);
1675 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1676 * the fileserver for VG information, or by scanning the /vicepX partition.
1678 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1679 * are salvaging, or 0 if this is a partition
1682 * @return operation status
1684 * @retval -1 we raced with a fileserver restart; checking out and locking
1685 * volumes must be retried
1688 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1690 afs_int32 nvols = 0;
1691 struct SalvageScanParams params;
1694 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1696 /* we successfully got the vol information from the fileserver; no
1697 * need to scan the partition */
1701 /* we need to retry volume checkout */
1705 if (!singleVolumeNumber) {
1706 /* Count how many volumes we have in /vicepX */
1707 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1710 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1715 nvols = VOL_VG_MAX_VOLS;
1718 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1719 osi_Assert(salvinfo->volumeSummaryp != NULL);
1721 params.singleVolumeNumber = singleVolumeNumber;
1722 params.vsp = salvinfo->volumeSummaryp;
1723 params.nVolumes = 0;
1724 params.totalVolumes = nvols;
1726 params.salvinfo = salvinfo;
1728 /* walk the partition directory of volume headers and record the info
1729 * about them; unlinking invalid headers */
1730 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1731 UnlinkHeader, ¶ms);
1733 /* we apparently need to retry checking-out/locking volumes */
1737 Abort("Failed to get volume header summary\n");
1739 salvinfo->nVolumes = params.nVolumes;
1741 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1747 /* Find the link table. This should be associated with the RW volume or, if
1748 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1751 FindLinkHandle(struct InodeSummary *isp, int nVols,
1752 struct ViceInodeInfo *allInodes)
1755 struct ViceInodeInfo *ip;
1757 for (i = 0; i < nVols; i++) {
1758 ip = allInodes + isp[i].index;
1759 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1760 if (ip[j].u.special.type == VI_LINKTABLE)
1761 return ip[j].inodeNumber;
1768 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1770 struct versionStamp version;
1773 if (!VALID_INO(ino))
1775 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1776 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1777 if (!VALID_INO(ino))
1779 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1780 isp->RWvolumeId, errno);
1781 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1782 fdP = IH_OPEN(salvinfo->VGLinkH);
1784 Abort("Can't open link table for volume %u (error = %d)\n",
1785 isp->RWvolumeId, errno);
1787 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1788 Abort("Can't truncate link table for volume %u (error = %d)\n",
1789 isp->RWvolumeId, errno);
1791 version.magic = LINKTABLEMAGIC;
1792 version.version = LINKTABLEVERSION;
1794 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1796 Abort("Can't truncate link table for volume %u (error = %d)\n",
1797 isp->RWvolumeId, errno);
1799 FDH_REALLYCLOSE(fdP);
1801 /* If the volume summary exits (i.e., the V*.vol header file exists),
1802 * then set this inode there as well.
1804 if (isp->volSummary)
1805 isp->volSummary->header.linkTable = ino;
1814 SVGParms_t *parms = (SVGParms_t *) arg;
1815 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1820 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1823 pthread_attr_t tattr;
1827 /* Initialize per volume global variables, even if later code does so */
1828 salvinfo->VolumeChanged = 0;
1829 salvinfo->VGLinkH = NULL;
1830 salvinfo->VGLinkH_cnt = 0;
1831 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1833 parms.svgp_inodeSummaryp = isp;
1834 parms.svgp_count = nVols;
1835 parms.svgp_salvinfo = salvinfo;
1836 code = pthread_attr_init(&tattr);
1838 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1842 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1844 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1847 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1849 Log("Failed to create thread to salvage volume group %u\n",
1853 (void)pthread_join(tid, NULL);
1855 #endif /* AFS_NT40_ENV */
1858 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1860 struct ViceInodeInfo *inodes, *allInodes, *ip;
1861 int i, totalInodes, size, salvageTo;
1865 int dec_VGLinkH = 0;
1867 FdHandle_t *fdP = NULL;
1869 salvinfo->VGLinkH_cnt = 0;
1870 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1871 && isp->nSpecialInodes > 0);
1872 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1873 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1876 if (ShowMounts && !haveRWvolume)
1878 if (canfork && !debug && Fork() != 0) {
1879 (void)Wait("Salvage volume group");
1882 for (i = 0, totalInodes = 0; i < nVols; i++)
1883 totalInodes += isp[i].nInodes;
1884 size = totalInodes * sizeof(struct ViceInodeInfo);
1885 inodes = (struct ViceInodeInfo *)malloc(size);
1886 allInodes = inodes - isp->index; /* this would the base of all the inodes
1887 * for the partition, if all the inodes
1888 * had been read into memory */
1889 osi_Assert(afs_lseek
1890 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1892 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1894 /* Don't try to salvage a read write volume if there isn't one on this
1896 salvageTo = haveRWvolume ? 0 : 1;
1898 #ifdef AFS_NAMEI_ENV
1899 ino = FindLinkHandle(isp, nVols, allInodes);
1900 if (VALID_INO(ino)) {
1901 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1904 if (!VALID_INO(ino) || fdP == NULL) {
1905 Log("%s link table for volume %u.\n",
1906 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1908 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1911 struct ViceInodeInfo *ip;
1912 CreateLinkTable(salvinfo, isp, ino);
1913 fdP = IH_OPEN(salvinfo->VGLinkH);
1914 /* Sync fake 1 link counts to the link table, now that it exists */
1916 for (i = 0; i < nVols; i++) {
1917 ip = allInodes + isp[i].index;
1918 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1919 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1926 FDH_REALLYCLOSE(fdP);
1928 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1931 /* Salvage in reverse order--read/write volume last; this way any
1932 * Inodes not referenced by the time we salvage the read/write volume
1933 * can be picked up by the read/write volume */
1934 /* ACTUALLY, that's not done right now--the inodes just vanish */
1935 for (i = nVols - 1; i >= salvageTo; i--) {
1937 struct InodeSummary *lisp = &isp[i];
1938 #ifdef AFS_NAMEI_ENV
1939 /* If only the RO is present on this partition, the link table
1940 * shows up as a RW volume special file. Need to make sure the
1941 * salvager doesn't try to salvage the non-existent RW.
1943 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1944 /* If this only special inode is the link table, continue */
1945 if (inodes->u.special.type == VI_LINKTABLE) {
1952 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1953 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1954 /* Check inodes twice. The second time do things seriously. This
1955 * way the whole RO volume can be deleted, below, if anything goes wrong */
1956 for (check = 1; check >= 0; check--) {
1958 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1960 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1961 if (rw && deleteMe) {
1962 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1963 * volume won't be called */
1969 if (rw && check == 1)
1971 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1972 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1978 /* Fix actual inode counts */
1981 Log("totalInodes %d\n",totalInodes);
1982 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1983 static int TraceBadLinkCounts = 0;
1984 #ifdef AFS_NAMEI_ENV
1985 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1986 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1987 VGLinkH_p1 = ip->u.param[0];
1988 continue; /* Deal with this last. */
1991 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1992 TraceBadLinkCounts--; /* Limit reports, per volume */
1993 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1995 while (ip->linkCount > 0) {
1996 /* below used to assert, not break */
1998 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1999 Log("idec failed. inode %s errno %d\n",
2000 PrintInode(stmp, ip->inodeNumber), errno);
2006 while (ip->linkCount < 0) {
2007 /* these used to be asserts */
2009 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2010 Log("iinc failed. inode %s errno %d\n",
2011 PrintInode(stmp, ip->inodeNumber), errno);
2018 #ifdef AFS_NAMEI_ENV
2019 while (dec_VGLinkH > 0) {
2020 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2021 Log("idec failed on link table, errno = %d\n", errno);
2025 while (dec_VGLinkH < 0) {
2026 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2027 Log("iinc failed on link table, errno = %d\n", errno);
2034 /* Directory consistency checks on the rw volume */
2036 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2037 IH_RELEASE(salvinfo->VGLinkH);
2039 if (canfork && !debug) {
2046 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2048 /* Check headers BEFORE forking */
2052 for (i = 0; i < nVols; i++) {
2053 struct VolumeSummary *vs = isp[i].volSummary;
2054 VolumeDiskData volHeader;
2056 /* Don't salvage just because phantom rw volume is there... */
2057 /* (If a read-only volume exists, read/write inodes must also exist) */
2058 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2062 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2063 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2064 == sizeof(volHeader)
2065 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2066 && volHeader.dontSalvage == DONT_SALVAGE
2067 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2068 if (volHeader.inUse != 0) {
2069 volHeader.inUse = 0;
2070 volHeader.inService = 1;
2072 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2073 != sizeof(volHeader)) {
2089 /* SalvageVolumeHeaderFile
2091 * Salvage the top level V*.vol header file. Make sure the special files
2092 * exist and that there are no duplicates.
2094 * Calls SalvageHeader for each possible type of volume special file.
2098 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2099 struct ViceInodeInfo *inodes, int RW,
2100 int check, int *deleteMe)
2103 struct ViceInodeInfo *ip;
2104 int allinodesobsolete = 1;
2105 struct VolumeDiskHeader diskHeader;
2106 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2108 struct VolumeHeader tempHeader;
2109 struct afs_inode_info stuff[MAXINODETYPE];
2111 /* keeps track of special inodes that are probably 'good'; they are
2112 * referenced in the vol header, and are included in the given inodes
2117 } goodspecial[MAXINODETYPE];
2122 memset(goodspecial, 0, sizeof(goodspecial));
2124 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2126 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2128 Log("cannot allocate memory for inode skip array when salvaging "
2129 "volume %lu; not performing duplicate special inode recovery\n",
2130 afs_printable_uint32_lu(isp->volumeId));
2131 /* still try to perform the salvage; the skip array only does anything
2132 * if we detect duplicate special inodes */
2135 init_inode_info(&tempHeader, stuff);
2138 * First, look at the special inodes and see if any are referenced by
2139 * the existing volume header. If we find duplicate special inodes, we
2140 * can use this information to use the referenced inode (it's more
2141 * likely to be the 'good' one), and throw away the duplicates.
2143 if (isp->volSummary && skip) {
2144 /* use tempHeader, so we can use the stuff[] array to easily index
2145 * into the isp->volSummary special inodes */
2146 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2148 for (i = 0; i < isp->nSpecialInodes; i++) {
2149 ip = &inodes[isp->index + i];
2150 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2151 /* will get taken care of in a later loop */
2154 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2155 goodspecial[ip->u.special.type-1].valid = 1;
2156 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2161 memset(&tempHeader, 0, sizeof(tempHeader));
2162 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2163 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2164 tempHeader.id = isp->volumeId;
2165 tempHeader.parent = isp->RWvolumeId;
2167 /* Check for duplicates (inodes are sorted by type field) */
2168 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2169 ip = &inodes[isp->index + i];
2170 if (ip->u.special.type == (ip + 1)->u.special.type) {
2171 afs_ino_str_t stmp1, stmp2;
2173 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2174 /* Will be caught in the loop below */
2178 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2179 ip->u.special.type, isp->volumeId,
2180 PrintInode(stmp1, ip->inodeNumber),
2181 PrintInode(stmp2, (ip+1)->inodeNumber));
2183 if (skip && goodspecial[ip->u.special.type-1].valid) {
2184 Inode gi = goodspecial[ip->u.special.type-1].inode;
2187 Log("using special inode referenced by vol header (%s)\n",
2188 PrintInode(stmp1, gi));
2191 /* the volume header references some special inode of
2192 * this type in the inodes array; are we it? */
2193 if (ip->inodeNumber != gi) {
2195 } else if ((ip+1)->inodeNumber != gi) {
2196 /* in case this is the last iteration; we need to
2197 * make sure we check ip+1, too */
2202 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2210 for (i = 0; i < isp->nSpecialInodes; i++) {
2212 ip = &inodes[isp->index + i];
2213 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2215 Log("Rubbish header inode %s of type %d\n",
2216 PrintInode(stmp, ip->inodeNumber),
2217 ip->u.special.type);
2223 Log("Rubbish header inode %s of type %d; deleted\n",
2224 PrintInode(stmp, ip->inodeNumber),
2225 ip->u.special.type);
2226 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2227 if (skip && skip[i]) {
2228 if (orphans == ORPH_REMOVE) {
2229 Log("Removing orphan special inode %s of type %d\n",
2230 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2233 Log("Ignoring orphan special inode %s of type %d\n",
2234 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2235 /* fall through to the ip->linkCount--; line below */
2238 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2239 allinodesobsolete = 0;
2241 if (!check && ip->u.special.type != VI_LINKTABLE)
2242 ip->linkCount--; /* Keep the inode around */
2250 if (allinodesobsolete) {
2257 salvinfo->VGLinkH_cnt++; /* one for every header. */
2259 if (!RW && !check && isp->volSummary) {
2260 ClearROInUseBit(isp->volSummary);
2264 for (i = 0; i < MAXINODETYPE; i++) {
2265 if (stuff[i].inodeType == VI_LINKTABLE) {
2266 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2267 * And we may have recreated the link table earlier, so set the
2268 * RW header as well.
2270 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2271 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2275 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2279 if (isp->volSummary == NULL) {
2281 char headerName[64];
2282 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2283 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2285 Log("No header file for volume %u\n", isp->volumeId);
2289 Log("No header file for volume %u; %screating %s\n",
2290 isp->volumeId, (Testing ? "it would have been " : ""),
2292 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2293 isp->volSummary->fileName = ToString(headerName);
2295 writefunc = VCreateVolumeDiskHeader;
2298 char headerName[64];
2299 /* hack: these two fields are obsolete... */
2300 isp->volSummary->header.volumeAcl = 0;
2301 isp->volSummary->header.volumeMountTable = 0;
2304 (&isp->volSummary->header, &tempHeader,
2305 sizeof(struct VolumeHeader))) {
2306 /* We often remove the name before calling us, so we make a fake one up */
2307 if (isp->volSummary->fileName) {
2308 strcpy(headerName, isp->volSummary->fileName);
2310 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2311 isp->volSummary->fileName = ToString(headerName);
2313 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2315 Log("Header file %s is damaged or no longer valid%s\n", path,
2316 (check ? "" : "; repairing"));
2320 writefunc = VWriteVolumeDiskHeader;
2324 memcpy(&isp->volSummary->header, &tempHeader,
2325 sizeof(struct VolumeHeader));
2328 Log("It would have written a new header file for volume %u\n",
2332 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2333 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2335 Log("Error %ld writing volume header file for volume %lu\n",
2336 afs_printable_int32_ld(code),
2337 afs_printable_uint32_lu(diskHeader.id));
2342 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2343 isp->volSummary->header.volumeInfo);
2348 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2349 struct InodeSummary *isp, int check, int *deleteMe)
2352 VolumeDiskData volumeInfo;
2353 struct versionStamp fileHeader;
2362 #ifndef AFS_NAMEI_ENV
2363 if (sp->inodeType == VI_LINKTABLE)
2366 if (*(sp->inode) == 0) {
2368 Log("Missing inode in volume header (%s)\n", sp->description);
2372 Log("Missing inode in volume header (%s); %s\n", sp->description,
2373 (Testing ? "it would have recreated it" : "recreating"));
2376 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2377 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2378 if (!VALID_INO(*(sp->inode)))
2380 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2381 sp->description, errno);
2386 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2387 fdP = IH_OPEN(specH);
2388 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2389 /* bail out early and destroy the volume */
2391 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2398 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2399 sp->description, errno);
2402 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2403 || header.fileHeader.magic != sp->stamp.magic)) {
2405 Log("Part of the header (%s) is corrupted\n", sp->description);
2406 FDH_REALLYCLOSE(fdP);
2410 Log("Part of the header (%s) is corrupted; recreating\n",
2413 /* header can be garbage; make sure we don't read garbage data from
2415 memset(&header, 0, sizeof(header));
2417 if (sp->inodeType == VI_VOLINFO
2418 && header.volumeInfo.destroyMe == DESTROY_ME) {
2421 FDH_REALLYCLOSE(fdP);
2425 if (recreate && !Testing) {
2428 ("Internal error: recreating volume header (%s) in check mode\n",
2430 nBytes = FDH_TRUNC(fdP, 0);
2432 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2433 sp->description, errno);
2435 /* The following code should be moved into vutil.c */
2436 if (sp->inodeType == VI_VOLINFO) {
2438 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2439 header.volumeInfo.stamp = sp->stamp;
2440 header.volumeInfo.id = isp->volumeId;
2441 header.volumeInfo.parentId = isp->RWvolumeId;
2442 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2443 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2444 isp->volumeId, isp->volumeId);
2445 header.volumeInfo.inService = 0;
2446 header.volumeInfo.blessed = 0;
2447 /* The + 1000 is a hack in case there are any files out in venus caches */
2448 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2449 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2450 header.volumeInfo.needsCallback = 0;
2451 gettimeofday(&tp, 0);
2452 header.volumeInfo.creationDate = tp.tv_sec;
2454 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2455 sizeof(header.volumeInfo), 0);
2456 if (nBytes != sizeof(header.volumeInfo)) {
2459 ("Unable to write volume header file (%s) (errno = %d)\n",
2460 sp->description, errno);
2461 Abort("Unable to write entire volume header file (%s)\n",
2465 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2466 if (nBytes != sizeof(sp->stamp)) {
2469 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2470 sp->description, errno);
2472 ("Unable to write entire version stamp in volume header file (%s)\n",
2477 FDH_REALLYCLOSE(fdP);
2479 if (sp->inodeType == VI_VOLINFO) {
2480 salvinfo->VolInfo = header.volumeInfo;
2484 if (salvinfo->VolInfo.updateDate) {
2485 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2487 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2488 salvinfo->VolInfo.id,
2489 (Testing ? "it would have been " : ""), update);
2491 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2493 Log("%s (%u) not updated (created %s)\n",
2494 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2504 SalvageVnodes(struct SalvInfo *salvinfo,
2505 struct InodeSummary *rwIsp,
2506 struct InodeSummary *thisIsp,
2507 struct ViceInodeInfo *inodes, int check)
2509 int ilarge, ismall, ioffset, RW, nInodes;
2510 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2513 RW = (rwIsp == thisIsp);
2514 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2516 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2517 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2518 if (check && ismall == -1)
2521 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2522 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2523 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2527 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2528 struct ViceInodeInfo *ip, int nInodes,
2529 struct VolumeSummary *volSummary, int check)
2531 char buf[SIZEOF_LARGEDISKVNODE];
2532 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2534 StreamHandle_t *file;
2535 struct VnodeClassInfo *vcp;
2537 afs_sfsize_t nVnodes;
2538 afs_fsize_t vnodeLength;
2540 afs_ino_str_t stmp1, stmp2;
2544 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2545 fdP = IH_OPEN(handle);
2546 osi_Assert(fdP != NULL);
2547 file = FDH_FDOPEN(fdP, "r+");
2548 osi_Assert(file != NULL);
2549 vcp = &VnodeClassInfo[class];
2550 size = OS_SIZE(fdP->fd_fd);
2551 osi_Assert(size != -1);
2552 nVnodes = (size / vcp->diskSize) - 1;
2554 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2555 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2559 for (vnodeIndex = 0;
2560 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2561 nVnodes--, vnodeIndex++) {
2562 if (vnode->type != vNull) {
2563 int vnodeChanged = 0;
2564 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2565 if (VNDISK_GET_INO(vnode) == 0) {
2567 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2568 memset(vnode, 0, vcp->diskSize);
2572 if (vcp->magic != vnode->vnodeMagic) {
2573 /* bad magic #, probably partially created vnode */
2575 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2576 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2577 afs_printable_uint32_lu(vcp->magic));
2578 memset(vnode, 0, vcp->diskSize);
2582 Log("Partially allocated vnode %d deleted.\n",
2584 memset(vnode, 0, vcp->diskSize);
2588 /* ****** Should do a bit more salvage here: e.g. make sure
2589 * vnode type matches what it should be given the index */
2590 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2591 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2592 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2593 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2600 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2601 /* The following doesn't work, because the version number
2602 * is not maintained correctly by the file server */
2603 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2604 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2606 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2612 /* For RW volume, look for vnode with matching inode number;
2613 * if no such match, take the first determined by our sort
2615 struct ViceInodeInfo *lip = ip;
2616 int lnInodes = nInodes;
2618 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2619 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2628 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2629 /* "Matching" inode */
2633 vu = vnode->uniquifier;
2634 iu = ip->u.vnode.vnodeUniquifier;
2635 vd = vnode->dataVersion;
2636 id = ip->u.vnode.inodeDataVersion;
2638 * Because of the possibility of the uniquifier overflows (> 4M)
2639 * we compare them modulo the low 22-bits; we shouldn't worry
2640 * about mismatching since they shouldn't to many old
2641 * uniquifiers of the same vnode...
2643 if (IUnique(vu) != IUnique(iu)) {
2645 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2648 vnode->uniquifier = iu;
2649 #ifdef AFS_3DISPARES
2650 vnode->dataVersion = (id >= vd ?
2653 1887437 ? vd : id) :
2656 1887437 ? id : vd));
2658 #if defined(AFS_SGI_EXMAG)
2659 vnode->dataVersion = (id >= vd ?
2662 15099494 ? vd : id) :
2665 15099494 ? id : vd));
2667 vnode->dataVersion = (id > vd ? id : vd);
2668 #endif /* AFS_SGI_EXMAG */
2669 #endif /* AFS_3DISPARES */
2672 /* don't bother checking for vd > id any more, since
2673 * partial file transfers always result in this state,
2674 * and you can't do much else anyway (you've already
2675 * found the best data you can) */
2676 #ifdef AFS_3DISPARES
2677 if (!vnodeIsDirectory(vnodeNumber)
2678 && ((vd < id && (id - vd) < 1887437)
2679 || ((vd > id && (vd - id) > 1887437)))) {
2681 #if defined(AFS_SGI_EXMAG)
2682 if (!vnodeIsDirectory(vnodeNumber)
2683 && ((vd < id && (id - vd) < 15099494)
2684 || ((vd > id && (vd - id) > 15099494)))) {
2686 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2687 #endif /* AFS_SGI_EXMAG */
2690 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2691 vnode->dataVersion = id;
2696 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2699 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2701 VNDISK_SET_INO(vnode, ip->inodeNumber);
2706 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2708 VNDISK_SET_INO(vnode, ip->inodeNumber);
2711 VNDISK_GET_LEN(vnodeLength, vnode);
2712 if (ip->byteCount != vnodeLength) {
2715 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2720 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2721 VNDISK_SET_LEN(vnode, ip->byteCount);
2725 ip->linkCount--; /* Keep the inode around */
2728 } else { /* no matching inode */
2730 if (VNDISK_GET_INO(vnode) != 0
2731 || vnode->type == vDirectory) {
2732 /* No matching inode--get rid of the vnode */
2734 if (VNDISK_GET_INO(vnode)) {
2736 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2740 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2745 if (VNDISK_GET_INO(vnode)) {
2747 time_t serverModifyTime = vnode->serverModifyTime;
2748 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2752 time_t serverModifyTime = vnode->serverModifyTime;
2753 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2756 memset(vnode, 0, vcp->diskSize);
2759 /* Should not reach here becuase we checked for
2760 * (inodeNumber == 0) above. And where we zero the vnode,
2761 * we also goto vnodeDone.
2765 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2769 } /* VNDISK_GET_INO(vnode) != 0 */
2771 osi_Assert(!(vnodeChanged && check));
2772 if (vnodeChanged && !Testing) {
2773 osi_Assert(IH_IWRITE
2774 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2775 (char *)vnode, vcp->diskSize)
2777 salvinfo->VolumeChanged = 1; /* For break call back */
2788 struct VnodeEssence *
2789 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2792 struct VnodeInfo *vip;
2795 class = vnodeIdToClass(vnodeNumber);
2796 vip = &salvinfo->vnodeInfo[class];
2797 offset = vnodeIdToBitNumber(vnodeNumber);
2798 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2802 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2804 /* Copy the directory unconditionally if we are going to change it:
2805 * not just if was cloned.
2807 struct VnodeDiskObject vnode;
2808 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2809 Inode oldinode, newinode;
2812 if (dir->copied || Testing)
2814 DFlush(); /* Well justified paranoia... */
2817 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2818 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2820 osi_Assert(code == sizeof(vnode));
2821 oldinode = VNDISK_GET_INO(&vnode);
2822 /* Increment the version number by a whole lot to avoid problems with
2823 * clients that were promised new version numbers--but the file server
2824 * crashed before the versions were written to disk.
2827 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2828 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2830 osi_Assert(VALID_INO(newinode));
2831 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2833 VNDISK_SET_INO(&vnode, newinode);
2835 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2836 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2838 osi_Assert(code == sizeof(vnode));
2840 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2841 salvinfo->fileSysDevice, newinode,
2842 &salvinfo->VolumeChanged);
2843 /* Don't delete the original inode right away, because the directory is
2844 * still being scanned.
2850 * This function should either successfully create a new dir, or give up
2851 * and leave things the way they were. In particular, if it fails to write
2852 * the new dir properly, it should return w/o changing the reference to the
2856 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2858 struct VnodeDiskObject vnode;
2859 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2860 Inode oldinode, newinode;
2865 afs_int32 parentUnique = 1;
2866 struct VnodeEssence *vnodeEssence;
2871 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2873 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2874 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2876 osi_Assert(lcode == sizeof(vnode));
2877 oldinode = VNDISK_GET_INO(&vnode);
2878 /* Increment the version number by a whole lot to avoid problems with
2879 * clients that were promised new version numbers--but the file server
2880 * crashed before the versions were written to disk.
2883 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2884 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2886 osi_Assert(VALID_INO(newinode));
2887 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2888 &salvinfo->VolumeChanged);
2890 /* Assign . and .. vnode numbers from dir and vnode.parent.
2891 * The uniquifier for . is in the vnode.
2892 * The uniquifier for .. might be set to a bogus value of 1 and
2893 * the salvager will later clean it up.
2895 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2896 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2899 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2901 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2906 /* didn't really build the new directory properly, let's just give up. */
2907 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2908 Log("Directory salvage returned code %d, continuing.\n", code);
2910 Log("also failed to decrement link count on new inode");
2914 Log("Checking the results of the directory salvage...\n");
2915 if (!DirOK(&newdir)) {
2916 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2917 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2918 osi_Assert(code == 0);
2922 VNDISK_SET_INO(&vnode, newinode);
2923 length = Length(&newdir);
2924 VNDISK_SET_LEN(&vnode, length);
2926 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2927 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2929 osi_Assert(lcode == sizeof(vnode));
2932 nt_sync(salvinfo->fileSysDevice);
2934 sync(); /* this is slow, but hopefully rarely called. We don't have
2935 * an open FD on the file itself to fsync.
2939 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2941 /* make sure old directory file is really closed */
2942 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2943 FDH_REALLYCLOSE(fdP);
2945 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2946 osi_Assert(code == 0);
2947 dir->dirHandle = newdir;
2951 * arguments for JudgeEntry.
2953 struct judgeEntry_params {
2954 struct DirSummary *dir; /**< directory we're examining entries in */
2955 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2959 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2962 struct judgeEntry_params *params = arock;
2963 struct DirSummary *dir = params->dir;
2964 struct SalvInfo *salvinfo = params->salvinfo;
2965 struct VnodeEssence *vnodeEssence;
2966 afs_int32 dirOrphaned, todelete;
2968 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2970 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2971 if (vnodeEssence == NULL) {
2973 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2976 CopyOnWrite(salvinfo, dir);
2977 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2982 #ifndef AFS_NAMEI_ENV
2983 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2984 * mount inode for the partition. If this inode were deleted, it would crash
2987 if (vnodeEssence->InodeNumber == 0) {
2988 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2990 CopyOnWrite(salvinfo, dir);
2991 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2998 if (!(vnodeNumber & 1) && !Showmode
2999 && !(vnodeEssence->count || vnodeEssence->unique
3000 || vnodeEssence->modeBits)) {
3001 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3002 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3003 vnodeNumber, unique,
3004 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3008 CopyOnWrite(salvinfo, dir);
3009 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3015 /* Check if the Uniquifiers match. If not, change the directory entry
3016 * so its unique matches the vnode unique. Delete if the unique is zero
3017 * or if the directory is orphaned.
3019 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3020 if (!vnodeEssence->unique
3021 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3022 /* This is an orphaned directory. Don't delete the . or ..
3023 * entry. Otherwise, it will get created in the next
3024 * salvage and deleted again here. So Just skip it.
3029 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3032 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3036 fid.Vnode = vnodeNumber;
3037 fid.Unique = vnodeEssence->unique;
3038 CopyOnWrite(salvinfo, dir);
3039 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3041 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3044 return 0; /* no need to continue */
3047 if (strcmp(name, ".") == 0) {
3048 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3051 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3053 CopyOnWrite(salvinfo, dir);
3054 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3055 fid.Vnode = dir->vnodeNumber;
3056 fid.Unique = dir->unique;
3057 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3060 vnodeNumber = fid.Vnode; /* Get the new Essence */
3061 unique = fid.Unique;
3062 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3065 } else if (strcmp(name, "..") == 0) {
3068 struct VnodeEssence *dotdot;
3069 pa.Vnode = dir->parent;
3070 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3071 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3072 pa.Unique = dotdot->unique;
3074 pa.Vnode = dir->vnodeNumber;
3075 pa.Unique = dir->unique;
3077 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3079 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3081 CopyOnWrite(salvinfo, dir);
3082 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3083 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3086 vnodeNumber = pa.Vnode; /* Get the new Essence */
3088 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3090 dir->haveDotDot = 1;
3091 } else if (strncmp(name, ".__afs", 6) == 0) {
3093 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3096 CopyOnWrite(salvinfo, dir);
3097 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3099 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3100 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3103 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3104 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3105 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3106 && !(vnodeEssence->modeBits & 0111)) {
3107 afs_sfsize_t nBytes;
3113 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3114 vnodeEssence->InodeNumber);
3117 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3121 size = FDH_SIZE(fdP);
3123 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3124 FDH_REALLYCLOSE(fdP);
3131 nBytes = FDH_PREAD(fdP, buf, size, 0);
3132 if (nBytes == size) {
3134 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3135 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3136 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3137 Testing ? "would convert" : "converted");
3138 vnodeEssence->modeBits |= 0111;
3139 vnodeEssence->changed = 1;
3140 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3141 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3142 dir->name ? dir->name : "??", name, buf);
3144 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3145 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3147 FDH_REALLYCLOSE(fdP);
3150 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3151 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3152 if (vnodeIdToClass(vnodeNumber) == vLarge
3153 && vnodeEssence->name == NULL) {
3155 if ((n = (char *)malloc(strlen(name) + 1)))
3157 vnodeEssence->name = n;
3160 /* The directory entry points to the vnode. Check to see if the
3161 * vnode points back to the directory. If not, then let the
3162 * directory claim it (else it might end up orphaned). Vnodes
3163 * already claimed by another directory are deleted from this
3164 * directory: hardlinks to the same vnode are not allowed
3165 * from different directories.
3167 if (vnodeEssence->parent != dir->vnodeNumber) {
3168 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3169 /* Vnode does not point back to this directory.
3170 * Orphaned dirs cannot claim a file (it may belong to
3171 * another non-orphaned dir).
3174 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3176 vnodeEssence->parent = dir->vnodeNumber;
3177 vnodeEssence->changed = 1;
3179 /* Vnode was claimed by another directory */
3182 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3183 } else if (vnodeNumber == 1) {
3184 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3186 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3190 CopyOnWrite(salvinfo, dir);
3191 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3196 /* This directory claims the vnode */
3197 vnodeEssence->claimed = 1;
3199 vnodeEssence->count--;
3204 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3205 VnodeClass class, Inode ino, Unique * maxu)
3207 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3208 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3209 char buf[SIZEOF_LARGEDISKVNODE];
3210 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3212 StreamHandle_t *file;
3217 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3218 fdP = IH_OPEN(vip->handle);
3219 osi_Assert(fdP != NULL);
3220 file = FDH_FDOPEN(fdP, "r+");
3221 osi_Assert(file != NULL);
3222 size = OS_SIZE(fdP->fd_fd);
3223 osi_Assert(size != -1);
3224 vip->nVnodes = (size / vcp->diskSize) - 1;
3225 if (vip->nVnodes > 0) {
3226 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3227 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3228 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3229 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3230 if (class == vLarge) {
3231 osi_Assert((vip->inodes = (Inode *)
3232 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3241 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3242 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3243 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3244 nVnodes--, vnodeIndex++) {
3245 if (vnode->type != vNull) {
3246 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3247 afs_fsize_t vnodeLength;
3248 vip->nAllocatedVnodes++;
3249 vep->count = vnode->linkCount;
3250 VNDISK_GET_LEN(vnodeLength, vnode);
3251 vep->blockCount = nBlocks(vnodeLength);
3252 vip->volumeBlockCount += vep->blockCount;
3253 vep->parent = vnode->parent;
3254 vep->unique = vnode->uniquifier;
3255 if (*maxu < vnode->uniquifier)
3256 *maxu = vnode->uniquifier;
3257 vep->modeBits = vnode->modeBits;
3258 vep->InodeNumber = VNDISK_GET_INO(vnode);
3259 vep->type = vnode->type;
3260 vep->author = vnode->author;
3261 vep->owner = vnode->owner;
3262 vep->group = vnode->group;
3263 if (vnode->type == vDirectory) {
3264 if (class != vLarge) {
3265 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3266 vip->nAllocatedVnodes--;
3267 memset(vnode, 0, sizeof(vnode));
3268 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3269 vnodeIndexOffset(vcp, vnodeNumber),
3270 (char *)&vnode, sizeof(vnode));
3271 salvinfo->VolumeChanged = 1;
3273 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3282 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3285 struct VnodeEssence *parentvp;
3291 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3292 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3293 strcat(path, OS_DIRSEP);
3294 strcat(path, vp->name);
3300 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3301 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3304 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3306 struct VnodeEssence *vep;
3309 return (1); /* Vnode zero does not exist */
3311 return (0); /* The root dir vnode is always claimed */
3312 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3313 if (!vep || !vep->claimed)
3314 return (1); /* Vnode is not claimed - it is orphaned */
3316 return (IsVnodeOrphaned(salvinfo, vep->parent));
3320 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3321 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3322 struct DirSummary *rootdir, int *rootdirfound)
3324 static struct DirSummary dir;
3325 static struct DirHandle dirHandle;
3326 struct VnodeEssence *parent;
3327 static char path[MAXPATHLEN];
3330 if (dirVnodeInfo->vnodes[i].salvaged)
3331 return; /* already salvaged */
3334 dirVnodeInfo->vnodes[i].salvaged = 1;
3336 if (dirVnodeInfo->inodes[i] == 0)
3337 return; /* Not allocated to a directory */
3339 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3340 if (dirVnodeInfo->vnodes[i].parent) {
3341 Log("Bad parent, vnode 1; %s...\n",
3342 (Testing ? "skipping" : "salvaging"));
3343 dirVnodeInfo->vnodes[i].parent = 0;
3344 dirVnodeInfo->vnodes[i].changed = 1;
3347 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3348 if (parent && parent->salvaged == 0)
3349 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3350 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3351 rootdir, rootdirfound);
3354 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3355 dir.unique = dirVnodeInfo->vnodes[i].unique;
3358 dir.parent = dirVnodeInfo->vnodes[i].parent;
3359 dir.haveDot = dir.haveDotDot = 0;
3360 dir.ds_linkH = alinkH;
3361 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3362 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3364 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3367 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3368 (Testing ? "skipping" : "salvaging"));
3371 CopyAndSalvage(salvinfo, &dir);
3373 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3376 dirHandle = dir.dirHandle;
3379 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3380 &dirVnodeInfo->vnodes[i], path);
3383 /* If enumeration failed for random reasons, we will probably delete
3384 * too much stuff, so we guard against this instead.
3386 struct judgeEntry_params judge_params;
3387 judge_params.salvinfo = salvinfo;
3388 judge_params.dir = &dir;
3390 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3393 /* Delete the old directory if it was copied in order to salvage.
3394 * CopyOnWrite has written the new inode # to the disk, but we still
3395 * have the old one in our local structure here. Thus, we idec the
3399 if (dir.copied && !Testing) {
3400 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3401 osi_Assert(code == 0);
3402 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3405 /* Remember rootdir DirSummary _after_ it has been judged */
3406 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3407 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3415 * Get a new FID that can be used to create a new file.
3417 * @param[in] volHeader vol header for the volume
3418 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3419 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3420 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3421 * updated to the new max unique if we create a new
3425 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3426 VnodeClass class, AFSFid *afid, Unique *maxunique)
3429 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3430 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3434 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3435 /* no free vnodes; make a new one */
3436 salvinfo->vnodeInfo[class].nVnodes++;
3437 salvinfo->vnodeInfo[class].vnodes =
3438 realloc(salvinfo->vnodeInfo[class].vnodes,
3439 sizeof(struct VnodeEssence) * (i+1));
3441 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3444 afid->Vnode = bitNumberToVnodeNumber(i, class);
3446 if (volHeader->uniquifier < (*maxunique + 1)) {
3447 /* header uniq is bad; it will get bumped by 2000 later */
3448 afid->Unique = *maxunique + 1 + 2000;
3451 /* header uniq seems okay; just use that */
3452 afid->Unique = *maxunique = volHeader->uniquifier++;
3457 * Create a vnode for a README file explaining not to use a recreated-root vol.
3459 * @param[in] volHeader vol header for the volume
3460 * @param[in] alinkH ihandle for i/o for the volume
3461 * @param[in] vid volume id
3462 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3463 * updated to the new max unique if we create a new
3465 * @param[out] afid FID for the new readme vnode
3466 * @param[out] ainode the inode for the new readme file
3468 * @return operation status
3473 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3474 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3478 struct VnodeDiskObject *rvnode = NULL;
3480 IHandle_t *readmeH = NULL;
3481 struct VnodeEssence *vep;
3483 time_t now = time(NULL);
3485 /* Try to make the note brief, but informative. Only administrators should
3486 * be able to read this file at first, so we can hopefully assume they
3487 * know what AFS is, what a volume is, etc. */
3489 "This volume has been salvaged, but has lost its original root directory.\n"
3490 "The root directory that exists now has been recreated from orphan files\n"
3491 "from the rest of the volume. This recreated root directory may interfere\n"
3492 "with old cached data on clients, and there is no way the salvager can\n"
3493 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3494 "use this volume, but only copy the salvaged data to a new volume.\n"
3495 "Continuing to use this volume as it exists now may cause some clients to\n"
3496 "behave oddly when accessing this volume.\n"
3497 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3498 /* ^ the person reading this probably just lost some data, so they could
3499 * use some cheering up. */
3501 /* -1 for the trailing NUL */
3502 length = sizeof(readme) - 1;
3504 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3506 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3508 /* create the inode and write the contents */
3509 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3510 salvinfo->fileSysPath, 0, vid,
3511 afid->Vnode, afid->Unique, 1);
3512 if (!VALID_INO(readmeinode)) {
3513 Log("CreateReadme: readme IH_CREATE failed\n");
3517 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3518 bytes = IH_IWRITE(readmeH, 0, readme, length);
3519 IH_RELEASE(readmeH);
3521 if (bytes != length) {
3522 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3523 (int)sizeof(readme));
3527 /* create the vnode and write it out */
3528 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3530 Log("CreateRootDir: error alloc'ing memory\n");
3534 rvnode->type = vFile;
3536 rvnode->modeBits = 0777;
3537 rvnode->linkCount = 1;
3538 VNDISK_SET_LEN(rvnode, length);
3539 rvnode->uniquifier = afid->Unique;
3540 rvnode->dataVersion = 1;
3541 VNDISK_SET_INO(rvnode, readmeinode);
3542 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3547 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3549 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3550 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3551 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3553 if (bytes != SIZEOF_SMALLDISKVNODE) {
3554 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3555 (int)SIZEOF_SMALLDISKVNODE);
3559 /* update VnodeEssence for new readme vnode */
3560 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3562 vep->blockCount = nBlocks(length);
3563 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3564 vep->parent = rvnode->parent;
3565 vep->unique = rvnode->uniquifier;
3566 vep->modeBits = rvnode->modeBits;
3567 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3568 vep->type = rvnode->type;
3569 vep->author = rvnode->author;
3570 vep->owner = rvnode->owner;
3571 vep->group = rvnode->group;
3581 *ainode = readmeinode;
3586 if (IH_DEC(alinkH, readmeinode, vid)) {
3587 Log("CreateReadme (recovery): IH_DEC failed\n");
3599 * create a root dir for a volume that lacks one.
3601 * @param[in] volHeader vol header for the volume
3602 * @param[in] alinkH ihandle for disk access for this volume group
3603 * @param[in] vid volume id we're dealing with
3604 * @param[out] rootdir populated with info about the new root dir
3605 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3606 * updated to the new max unique if we create a new
3609 * @return operation status
3614 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3615 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3619 int decroot = 0, decreadme = 0;
3620 AFSFid did, readmeid;
3623 struct VnodeDiskObject *rootvnode = NULL;
3624 struct acl_accessList *ACL;
3627 struct VnodeEssence *vep;
3629 time_t now = time(NULL);
3631 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3632 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3636 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3637 /* We don't have any large vnodes in the volume; allocate room
3638 * for one so we can recreate the root dir */
3639 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3640 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3641 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3643 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3644 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3647 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3648 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3649 if (vep->type != vNull) {
3650 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3654 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3655 &readmeinode) != 0) {
3660 /* set the DV to a very high number, so it is unlikely that we collide
3661 * with a cached DV */
3664 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3666 if (!VALID_INO(rootinode)) {
3667 Log("CreateRootDir: IH_CREATE failed\n");
3672 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3673 rootinode, &salvinfo->VolumeChanged);
3677 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3678 Log("CreateRootDir: MakeDir failed\n");
3681 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3682 Log("CreateRootDir: Create failed\n");
3686 length = Length(&rootdir->dirHandle);
3687 DZap((void *)&rootdir->dirHandle);
3689 /* create the new root dir vnode */
3690 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3692 Log("CreateRootDir: malloc failed\n");
3696 /* only give 'rl' permissions to 'system:administrators'. We do this to
3697 * try to catch the attention of an administrator, that they should not
3698 * be writing to this directory or continue to use it. */
3699 ACL = VVnodeDiskACL(rootvnode);
3700 ACL->size = sizeof(struct acl_accessList);
3701 ACL->version = ACL_ACLVERSION;
3705 ACL->entries[0].id = -204; /* system:administrators */
3706 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3708 rootvnode->type = vDirectory;
3709 rootvnode->cloned = 0;
3710 rootvnode->modeBits = 0777;
3711 rootvnode->linkCount = 2;
3712 VNDISK_SET_LEN(rootvnode, length);
3713 rootvnode->uniquifier = 1;
3714 rootvnode->dataVersion = dv;
3715 VNDISK_SET_INO(rootvnode, rootinode);
3716 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3717 rootvnode->author = 0;
3718 rootvnode->owner = 0;
3719 rootvnode->parent = 0;
3720 rootvnode->group = 0;
3721 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3723 /* write it out to disk */
3724 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3725 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3726 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3728 if (bytes != SIZEOF_LARGEDISKVNODE) {
3729 /* just cast to int and don't worry about printing real 64-bit ints;
3730 * a large disk vnode isn't anywhere near the 32-bit limit */
3731 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3732 (int)SIZEOF_LARGEDISKVNODE);
3736 /* update VnodeEssence for the new root vnode */
3737 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3739 vep->blockCount = nBlocks(length);
3740 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3741 vep->parent = rootvnode->parent;
3742 vep->unique = rootvnode->uniquifier;
3743 vep->modeBits = rootvnode->modeBits;
3744 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3745 vep->type = rootvnode->type;
3746 vep->author = rootvnode->author;
3747 vep->owner = rootvnode->owner;
3748 vep->group = rootvnode->group;
3758 /* update DirSummary for the new root vnode */
3759 rootdir->vnodeNumber = 1;
3760 rootdir->unique = 1;
3761 rootdir->haveDot = 1;
3762 rootdir->haveDotDot = 1;
3763 rootdir->rwVid = vid;
3764 rootdir->copied = 0;
3765 rootdir->parent = 0;
3766 rootdir->name = strdup(".");
3767 rootdir->vname = volHeader->name;
3768 rootdir->ds_linkH = alinkH;
3775 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3776 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3778 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3779 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3789 * salvage a volume group.
3791 * @param[in] salvinfo information for the curent salvage job
3792 * @param[in] rwIsp inode summary for rw volume
3793 * @param[in] alinkH link table inode handle
3795 * @return operation status
3799 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3801 /* This routine, for now, will only be called for read-write volumes */
3803 int BlocksInVolume = 0, FilesInVolume = 0;
3805 struct DirSummary rootdir, oldrootdir;
3806 struct VnodeInfo *dirVnodeInfo;
3807 struct VnodeDiskObject vnode;
3808 VolumeDiskData volHeader;
3810 int orphaned, rootdirfound = 0;
3811 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3812 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3813 struct VnodeEssence *vep;
3816 afs_sfsize_t nBytes;
3818 VnodeId LFVnode, ThisVnode;
3819 Unique LFUnique, ThisUnique;
3823 vid = rwIsp->volSummary->header.id;
3824 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3825 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3826 osi_Assert(nBytes == sizeof(volHeader));
3827 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3828 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3829 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3831 DistilVnodeEssence(salvinfo, vid, vLarge,
3832 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3833 DistilVnodeEssence(salvinfo, vid, vSmall,
3834 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3836 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3837 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3838 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3839 &rootdir, &rootdirfound);
3842 nt_sync(salvinfo->fileSysDevice);
3844 sync(); /* This used to be done lower level, for every dir */
3851 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3853 Log("Cannot find root directory for volume %lu; attempting to create "
3854 "a new one\n", afs_printable_uint32_lu(vid));
3856 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3861 salvinfo->VolumeChanged = 1;
3865 /* Parse each vnode looking for orphaned vnodes and
3866 * connect them to the tree as orphaned (if requested).
3868 oldrootdir = rootdir;
3869 for (class = 0; class < nVNODECLASSES; class++) {
3870 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3871 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3872 ThisVnode = bitNumberToVnodeNumber(v, class);
3873 ThisUnique = vep->unique;
3875 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3876 continue; /* Ignore unused, claimed, and root vnodes */
3878 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3879 * entry in this vnode had incremented the parent link count (In
3880 * JudgeEntry()). We need to go to the parent and decrement that
3881 * link count. But if the parent's unique is zero, then the parent
3882 * link count was not incremented in JudgeEntry().
3884 if (class == vLarge) { /* directory vnode */
3885 pv = vnodeIdToBitNumber(vep->parent);
3886 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3887 if (vep->parent == 1 && newrootdir) {
3888 /* this vnode's parent was the volume root, and
3889 * we just created the volume root. So, the parent
3890 * dir didn't exist during JudgeEntry, so the link
3891 * count was not inc'd there, so don't dec it here.
3897 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3903 continue; /* If no rootdir, can't attach orphaned files */
3905 /* Here we attach orphaned files and directories into the
3906 * root directory, LVVnode, making sure link counts stay correct.
3908 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3909 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3910 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3912 /* Update this orphaned vnode's info. Its parent info and
3913 * link count (do for orphaned directories and files).
3915 vep->parent = LFVnode; /* Parent is the root dir */
3916 vep->unique = LFUnique;
3919 vep->count--; /* Inc link count (root dir will pt to it) */
3921 /* If this orphaned vnode is a directory, change '..'.
3922 * The name of the orphaned dir/file is unknown, so we
3923 * build a unique name. No need to CopyOnWrite the directory
3924 * since it is not connected to tree in BK or RO volume and
3925 * won't be visible there.
3927 if (class == vLarge) {
3931 /* Remove and recreate the ".." entry in this orphaned directory */
3932 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3933 salvinfo->vnodeInfo[class].inodes[v],
3934 &salvinfo->VolumeChanged);
3936 pa.Unique = LFUnique;
3937 osi_Assert(Delete(&dh, "..") == 0);
3938 osi_Assert(Create(&dh, "..", &pa) == 0);
3940 /* The original parent's link count was decremented above.
3941 * Here we increment the new parent's link count.
3943 pv = vnodeIdToBitNumber(LFVnode);
3944 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3948 /* Go to the root dir and add this entry. The link count of the
3949 * root dir was incremented when ".." was created. Try 10 times.
3951 for (j = 0; j < 10; j++) {
3952 pa.Vnode = ThisVnode;
3953 pa.Unique = ThisUnique;
3955 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3957 vLarge) ? "__ORPHANDIR__" :
3958 "__ORPHANFILE__"), ThisVnode,
3961 CopyOnWrite(salvinfo, &rootdir);
3962 code = Create(&rootdir.dirHandle, npath, &pa);
3966 ThisUnique += 50; /* Try creating a different file */
3968 osi_Assert(code == 0);
3969 Log("Attaching orphaned %s to volume's root dir as %s\n",
3970 ((class == vLarge) ? "directory" : "file"), npath);
3972 } /* for each vnode in the class */
3973 } /* for each class of vnode */
3975 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3977 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3979 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3981 osi_Assert(code == 0);
3982 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3985 DFlush(); /* Flush the changes */
3986 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3987 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3988 orphans = ORPH_IGNORE;
3991 /* Write out all changed vnodes. Orphaned files and directories
3992 * will get removed here also (if requested).
3994 for (class = 0; class < nVNODECLASSES; class++) {
3995 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3996 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3997 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3998 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
3999 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4000 for (i = 0; i < nVnodes; i++) {
4001 struct VnodeEssence *vnp = &vnodes[i];
4002 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4004 /* If the vnode is good but is unclaimed (not listed in
4005 * any directory entries), then it is orphaned.
4008 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4009 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4013 if (vnp->changed || vnp->count) {
4016 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4017 vnodeIndexOffset(vcp, vnodeNumber),
4018 (char *)&vnode, sizeof(vnode));
4019 osi_Assert(nBytes == sizeof(vnode));
4021 vnode.parent = vnp->parent;
4022 oldCount = vnode.linkCount;
4023 vnode.linkCount = vnode.linkCount - vnp->count;
4026 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4028 if (!vnp->todelete) {
4029 /* Orphans should have already been attached (if requested) */
4030 osi_Assert(orphans != ORPH_ATTACH);
4031 oblocks += vnp->blockCount;
4034 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4036 BlocksInVolume -= vnp->blockCount;
4038 if (VNDISK_GET_INO(&vnode)) {
4040 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4041 osi_Assert(code == 0);
4043 memset(&vnode, 0, sizeof(vnode));
4045 } else if (vnp->count) {
4047 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4050 vnode.modeBits = vnp->modeBits;
4053 vnode.dataVersion++;
4056 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4057 vnodeIndexOffset(vcp, vnodeNumber),
4058 (char *)&vnode, sizeof(vnode));
4059 osi_Assert(nBytes == sizeof(vnode));
4061 salvinfo->VolumeChanged = 1;
4065 if (!Showmode && ofiles) {
4066 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4068 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4072 for (class = 0; class < nVNODECLASSES; class++) {
4073 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4074 for (i = 0; i < vip->nVnodes; i++)
4075 if (vip->vnodes[i].name)
4076 free(vip->vnodes[i].name);
4083 /* Set correct resource utilization statistics */
4084 volHeader.filecount = FilesInVolume;
4085 volHeader.diskused = BlocksInVolume;
4087 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4088 if (volHeader.uniquifier < (maxunique + 1)) {
4090 Log("Volume uniquifier is too low; fixed\n");
4091 /* Plus 2,000 in case there are workstations out there with
4092 * cached vnodes that have since been deleted
4094 volHeader.uniquifier = (maxunique + 1 + 2000);
4098 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4099 "Only use this salvaged volume to copy data to another volume; "
4100 "do not continue to use this volume (%lu) as-is.\n",
4101 afs_printable_uint32_lu(vid));
4104 #ifdef FSSYNC_BUILD_CLIENT
4105 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4106 afs_int32 fsync_code;
4108 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4110 Log("Error trying to tell the fileserver to break callbacks for "
4111 "changed volume %lu; error code %ld\n",
4112 afs_printable_uint32_lu(vid),
4113 afs_printable_int32_ld(fsync_code));
4115 salvinfo->VolumeChanged = 0;
4118 #endif /* FSSYNC_BUILD_CLIENT */
4120 /* Turn off the inUse bit; the volume's been salvaged! */
4121 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4122 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4123 volHeader.inService = 1; /* allow service again */
4124 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4125 volHeader.dontSalvage = DONT_SALVAGE;
4126 salvinfo->VolumeChanged = 0;
4128 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4129 osi_Assert(nBytes == sizeof(volHeader));
4132 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4133 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4134 FilesInVolume, BlocksInVolume);
4137 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4138 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4144 ClearROInUseBit(struct VolumeSummary *summary)
4146 IHandle_t *h = summary->volumeInfoHandle;
4147 afs_sfsize_t nBytes;
4149 VolumeDiskData volHeader;
4151 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4152 osi_Assert(nBytes == sizeof(volHeader));
4153 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4154 volHeader.inUse = 0;
4155 volHeader.needsSalvaged = 0;
4156 volHeader.inService = 1;
4157 volHeader.dontSalvage = DONT_SALVAGE;
4159 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4160 osi_Assert(nBytes == sizeof(volHeader));
4165 * Possible delete the volume.
4167 * deleteMe - Always do so, only a partial volume.
4170 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4171 char *message, int deleteMe, int check)
4173 if (readOnly(isp) || deleteMe) {
4174 if (isp->volSummary && isp->volSummary->fileName) {
4177 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4179 Log("It will be deleted on this server (you may find it elsewhere)\n");
4182 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4184 Log("it will be deleted instead. It should be recloned.\n");
4189 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4191 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4193 Log("Error %ld destroying volume disk header for volume %lu\n",
4194 afs_printable_int32_ld(code),
4195 afs_printable_uint32_lu(isp->volumeId));
4198 /* make sure we actually delete the fileName file; ENOENT
4199 * is fine, since VDestroyVolumeDiskHeader probably already
4201 if (unlink(path) && errno != ENOENT) {
4202 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4206 } else if (!check) {
4207 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4209 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4213 #ifdef AFS_DEMAND_ATTACH_FS
4215 * Locks a volume on disk for salvaging.
4217 * @param[in] volumeId volume ID to lock
4219 * @return operation status
4221 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4222 * checked out and locked again
4227 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4232 /* should always be WRITE_LOCK, but keep the lock-type logic all
4233 * in one place, in VVolLockType. Params will be ignored, but
4234 * try to provide what we're logically doing. */
4235 locktype = VVolLockType(V_VOLUPD, 1);
4237 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4239 if (code == EBUSY) {
4240 Abort("Someone else appears to be using volume %lu; Aborted\n",
4241 afs_printable_uint32_lu(volumeId));
4243 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4244 afs_printable_int32_ld(code),
4245 afs_printable_uint32_lu(volumeId));
4248 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4249 if (code == SYNC_DENIED) {
4250 /* need to retry checking out volumes */
4253 if (code != SYNC_OK) {
4254 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4255 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4258 /* set inUse = programType in the volume header to ensure that nobody
4259 * tries to use this volume again without salvaging, if we somehow crash
4260 * or otherwise exit before finishing the salvage.
4264 struct VolumeHeader header;
4265 struct VolumeDiskHeader diskHeader;
4266 struct VolumeDiskData volHeader;
4268 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4273 DiskToVolumeHeader(&header, &diskHeader);
4275 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4276 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4277 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4283 volHeader.inUse = programType;
4285 /* If we can't re-write the header, bail out and error. We don't
4286 * assert when reading the header, since it's possible the
4287 * header isn't really there (when there's no data associated
4288 * with the volume; we just delete the vol header file in that
4289 * case). But if it's there enough that we can read it, but
4290 * somehow we cannot write to it to signify we're salvaging it,
4291 * we've got a big problem and we cannot continue. */
4292 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4299 #endif /* AFS_DEMAND_ATTACH_FS */
4302 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4307 memset(&res, 0, sizeof(res));
4309 for (i = 0; i < 3; i++) {
4310 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4311 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4313 if (code == SYNC_OK) {
4315 } else if (code == SYNC_DENIED) {
4316 #ifdef DEMAND_ATTACH_ENABLE
4317 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4319 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4321 Abort("Salvage aborted\n");
4322 } else if (code == SYNC_BAD_COMMAND) {
4323 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4325 #ifdef DEMAND_ATTACH_ENABLE
4326 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4328 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4330 Abort("Salvage aborted\n");
4333 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4334 FSYNC_clientFinis();
4338 if (code != SYNC_OK) {
4339 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4340 Abort("Salvage aborted\n");
4345 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4349 for (i = 0; i < 3; i++) {
4350 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4351 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4353 if (code == SYNC_OK) {
4355 } else if (code == SYNC_DENIED) {
4356 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4357 } else if (code == SYNC_BAD_COMMAND) {
4358 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4360 #ifdef DEMAND_ATTACH_ENABLE
4361 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4363 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4368 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4369 FSYNC_clientFinis();
4376 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4378 /* Volume parameter is passed in case iopen is upgraded in future to
4379 * require a volume Id to be passed
4382 IHandle_t *srcH, *destH;
4383 FdHandle_t *srcFdP, *destFdP;
4385 afs_foff_t size = 0;
4387 IH_INIT(srcH, device, rwvolume, inode1);
4388 srcFdP = IH_OPEN(srcH);
4389 osi_Assert(srcFdP != NULL);
4390 IH_INIT(destH, device, rwvolume, inode2);
4391 destFdP = IH_OPEN(destH);
4392 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4393 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4396 osi_Assert(nBytes == 0);
4397 FDH_REALLYCLOSE(srcFdP);
4398 FDH_REALLYCLOSE(destFdP);
4405 PrintInodeList(struct SalvInfo *salvinfo)
4407 struct ViceInodeInfo *ip;
4408 struct ViceInodeInfo *buf;
4409 struct afs_stat status;
4413 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4414 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4415 osi_Assert(buf != NULL);
4416 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4417 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4418 for (ip = buf; nInodes--; ip++) {
4419 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4420 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4421 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4422 ip->u.param[2], ip->u.param[3]);
4428 PrintInodeSummary(struct SalvInfo *salvinfo)
4431 struct InodeSummary *isp;
4433 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4434 isp = &salvinfo->inodeSummary[i];
4435 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4440 PrintVolumeSummary(struct SalvInfo *salvinfo)
4443 struct VolumeSummary *vsp;
4445 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4446 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4456 osi_Assert(0); /* Fork is never executed in the NT code path */
4460 #ifdef AFS_DEMAND_ATTACH_FS
4461 if ((f == 0) && (programType == salvageServer)) {
4462 /* we are a salvageserver child */
4463 #ifdef FSSYNC_BUILD_CLIENT
4464 VChildProcReconnectFS_r();
4466 #ifdef SALVSYNC_BUILD_CLIENT
4470 #endif /* AFS_DEMAND_ATTACH_FS */
4471 #endif /* !AFS_NT40_ENV */
4481 #ifdef AFS_DEMAND_ATTACH_FS
4482 if (programType == salvageServer) {
4483 #ifdef SALVSYNC_BUILD_CLIENT
4486 #ifdef FSSYNC_BUILD_CLIENT
4490 #endif /* AFS_DEMAND_ATTACH_FS */
4493 if (main_thread != pthread_self())
4494 pthread_exit((void *)code);
4507 pid = wait(&status);
4508 osi_Assert(pid != -1);
4509 if (WCOREDUMP(status))
4510 Log("\"%s\" core dumped!\n", prog);
4511 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4517 TimeStamp(time_t clock, int precision)
4520 static char timestamp[20];
4521 lt = localtime(&clock);
4523 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4525 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4530 CheckLogFile(char * log_path)
4532 char oldSlvgLog[AFSDIR_PATH_MAX];
4534 #ifndef AFS_NT40_ENV
4541 strcpy(oldSlvgLog, log_path);
4542 strcat(oldSlvgLog, ".old");
4544 renamefile(log_path, oldSlvgLog);
4545 logFile = afs_fopen(log_path, "a");
4547 if (!logFile) { /* still nothing, use stdout */
4551 #ifndef AFS_NAMEI_ENV
4552 AFS_DEBUG_IOPS_LOG(logFile);
4557 #ifndef AFS_NT40_ENV
4559 TimeStampLogFile(char * log_path)
4561 char stampSlvgLog[AFSDIR_PATH_MAX];
4566 lt = localtime(&now);
4567 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4568 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4569 log_path, lt->tm_year + 1900,
4570 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4573 /* try to link the logfile to a timestamped filename */
4574 /* if it fails, oh well, nothing we can do */
4575 link(log_path, stampSlvgLog);
4584 #ifndef AFS_NT40_ENV
4586 printf("Can't show log since using syslog.\n");
4597 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4600 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4603 while (fgets(line, sizeof(line), logFile))
4610 Log(const char *format, ...)
4616 va_start(args, format);
4617 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4619 #ifndef AFS_NT40_ENV
4621 syslog(LOG_INFO, "%s", tmp);
4625 gettimeofday(&now, 0);
4626 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4632 Abort(const char *format, ...)
4637 va_start(args, format);
4638 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4640 #ifndef AFS_NT40_ENV
4642 syslog(LOG_INFO, "%s", tmp);
4646 fprintf(logFile, "%s", tmp);
4658 ToString(const char *s)
4661 p = (char *)malloc(strlen(s) + 1);
4662 osi_Assert(p != NULL);
4667 /* Remove the FORCESALVAGE file */
4669 RemoveTheForce(char *path)
4672 struct afs_stat force; /* so we can use afs_stat to find it */
4673 strcpy(target,path);
4674 strcat(target,"/FORCESALVAGE");
4675 if (!Testing && ForceSalvage) {
4676 if (afs_stat(target,&force) == 0) unlink(target);
4680 #ifndef AFS_AIX32_ENV
4682 * UseTheForceLuke - see if we can use the force
4685 UseTheForceLuke(char *path)
4687 struct afs_stat force;
4689 strcpy(target,path);
4690 strcat(target,"/FORCESALVAGE");
4692 return (afs_stat(target, &force) == 0);
4696 * UseTheForceLuke - see if we can use the force
4699 * The VRMIX fsck will not muck with the filesystem it is supposedly
4700 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4701 * muck directly with the root inode, which is within the normal
4703 * ListViceInodes() has a side effect of setting ForceSalvage if
4704 * it detects a need, based on root inode examination.
4707 UseTheForceLuke(char *path)
4710 return 0; /* sorry OB1 */
4715 /* NT support routines */
4717 static char execpathname[MAX_PATH];
4719 nt_SalvagePartition(char *partName, int jobn)
4724 if (!*execpathname) {
4725 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4726 if (!n || n == 1023)
4729 job.cj_magic = SALVAGER_MAGIC;
4730 job.cj_number = jobn;
4731 (void)strcpy(job.cj_part, partName);
4732 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4737 nt_SetupPartitionSalvage(void *datap, int len)
4739 childJob_t *jobp = (childJob_t *) datap;
4740 char logname[AFSDIR_PATH_MAX];
4742 if (len != sizeof(childJob_t))
4744 if (jobp->cj_magic != SALVAGER_MAGIC)
4749 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4751 logFile = afs_fopen(logname, "w");
4759 #endif /* AFS_NT40_ENV */