2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
207 /*@+fcnmacros +macrofcndecl@*/
210 extern off64_t afs_lseek(int FD, off64_t O, int F);
211 #endif /*S_SPLINT_S */
212 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
213 #define afs_stat stat64
214 #define afs_fstat fstat64
215 #define afs_open open64
216 #define afs_fopen fopen64
217 #else /* !O_LARGEFILE */
219 extern off_t afs_lseek(int FD, off_t O, int F);
220 #endif /*S_SPLINT_S */
221 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
222 #define afs_stat stat
223 #define afs_fstat fstat
224 #define afs_open open
225 #define afs_fopen fopen
226 #endif /* !O_LARGEFILE */
227 /*@=fcnmacros =macrofcndecl@*/
230 extern void *calloc();
232 static char *TimeStamp(time_t clock, int precision);
235 int debug; /* -d flag */
236 extern int Testing; /* -n flag */
237 int ListInodeOption; /* -i flag */
238 int ShowRootFiles; /* -r flag */
239 int RebuildDirs; /* -sal flag */
240 int Parallel = 4; /* -para X flag */
241 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
242 int forceR = 0; /* -b flag */
243 int ShowLog = 0; /* -showlog flag */
244 int ShowSuid = 0; /* -showsuid flag */
245 int ShowMounts = 0; /* -showmounts flag */
246 int orphans = ORPH_IGNORE; /* -orphans option */
251 int useSyslog = 0; /* -syslog flag */
252 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
261 #define MAXPARALLEL 32
263 int OKToZap; /* -o flag */
264 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
265 * in the volume header */
267 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
269 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
272 * information that is 'global' to a particular salvage job.
275 Device fileSysDevice; /**< The device number of the current partition
277 char fileSysPath[8]; /**< The path of the mounted partition currently
278 * being salvaged, i.e. the directory containing
279 * the volume headers */
280 char *fileSysPathName; /**< NT needs this to make name pretty log. */
281 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
282 int VGLinkH_cnt; /**< # of references to lnk handle. */
283 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
286 char *fileSysDeviceName; /**< The block device where the file system being
287 * salvaged was mounted */
288 char *filesysfulldev;
290 int VolumeChanged; /**< Set by any routine which would change the
291 * volume in a way which would require callbacks
292 * to be broken if the volume was put back on
293 * on line by an active file server */
295 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
296 * header dealt with */
298 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
299 int inodeFd; /**< File descriptor for inode file */
301 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
302 int nVolumes; /**< Number of volumes (read-write and read-only)
303 * in volume summary */
304 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
307 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
308 * vnodes in the volume that
309 * we are currently looking
311 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
312 * to contact the fileserver over FSYNC */
319 /* Forward declarations */
320 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
321 static int AskVolumeSummary(struct SalvInfo *salvinfo,
322 VolumeId singleVolumeNumber);
324 #ifdef AFS_DEMAND_ATTACH_FS
325 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
326 #endif /* AFS_DEMAND_ATTACH_FS */
328 /* Uniquifier stored in the Inode */
333 return (u & 0x3fffff);
335 #if defined(AFS_SGI_EXMAG)
336 return (u & SGI_UNIQMASK);
339 #endif /* AFS_SGI_EXMAG */
346 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
348 return 0; /* otherwise may be transient, e.g. EMFILE */
353 char *save_args[MAX_ARGS];
355 extern pthread_t main_thread;
356 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
360 * Get the salvage lock if not already held. Hold until process exits.
362 * @param[in] locktype READ_LOCK or WRITE_LOCK
365 _ObtainSalvageLock(int locktype)
367 struct VLockFile salvageLock;
372 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
374 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
377 "salvager: There appears to be another salvager running! "
382 "salvager: Error %d trying to acquire salvage lock! "
388 ObtainSalvageLock(void)
390 _ObtainSalvageLock(WRITE_LOCK);
393 ObtainSharedSalvageLock(void)
395 _ObtainSalvageLock(READ_LOCK);
399 #ifdef AFS_SGI_XFS_IOPS_ENV
400 /* Check if the given partition is mounted. For XFS, the root inode is not a
401 * constant. So we check the hard way.
404 IsPartitionMounted(char *part)
407 struct mntent *mntent;
409 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
410 while (mntent = getmntent(mntfp)) {
411 if (!strcmp(part, mntent->mnt_dir))
416 return mntent ? 1 : 1;
419 /* Check if the given inode is the root of the filesystem. */
420 #ifndef AFS_SGI_XFS_IOPS_ENV
422 IsRootInode(struct afs_stat *status)
425 * The root inode is not a fixed value in XFS partitions. So we need to
426 * see if the partition is in the list of mounted partitions. This only
427 * affects the SalvageFileSys path, so we check there.
429 return (status->st_ino == ROOTINODE);
434 #ifndef AFS_NAMEI_ENV
435 /* We don't want to salvage big files filesystems, since we can't put volumes on
439 CheckIfBigFilesFS(char *mountPoint, char *devName)
441 struct superblock fs;
444 if (strncmp(devName, "/dev/", 5)) {
445 (void)sprintf(name, "/dev/%s", devName);
447 (void)strcpy(name, devName);
450 if (ReadSuper(&fs, name) < 0) {
451 Log("Unable to read superblock. Not salvaging partition %s.\n",
455 if (IsBigFilesFileSystem(&fs)) {
456 Log("Partition %s is a big files filesystem, not salvaging.\n",
466 #define HDSTR "\\Device\\Harddisk"
467 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
469 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
475 static int dowarn = 1;
477 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
479 if (strncmp(res1, HDSTR, HDLEN)) {
482 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
483 res1, HDSTR, p1->devName);
486 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
488 if (strncmp(res2, HDSTR, HDLEN)) {
491 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
492 res2, HDSTR, p2->devName);
496 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
584 osi_Assert(pid != -1);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
589 osi_Assert(j < numjobs);
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
720 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 salvinfo->useFSYNC = 1;
794 AskOffline(salvinfo, singleVolumeNumber);
795 #ifdef AFS_DEMAND_ATTACH_FS
796 if (LockVolume(salvinfo, singleVolumeNumber)) {
799 #endif /* AFS_DEMAND_ATTACH_FS */
802 salvinfo->useFSYNC = 0;
803 VLockPartition(partP->name);
807 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
810 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
811 partP->name, name, (Testing ? "(READONLY mode)" : ""));
813 Log("***Forced salvage of all volumes on this partition***\n");
818 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
825 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
826 while ((dp = readdir(dirp))) {
827 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
828 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
830 Log("Removing old salvager temp files %s\n", dp->d_name);
831 strcpy(npath, salvinfo->fileSysPath);
832 strcat(npath, OS_DIRSEP);
833 strcat(npath, dp->d_name);
839 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
841 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
842 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
844 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
848 inodeFile = fopen(inodeListPath, "w+b");
850 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
853 /* Using nt_unlink here since we're really using the delete on close
854 * semantics of unlink. In most places in the salvager, we really do
855 * mean to unlink the file at that point. Those places have been
856 * modified to actually do that so that the NT crt can be used there.
858 * jaltman - On NT delete on close cannot be applied to a file while the
859 * process has an open file handle that does not have DELETE file
860 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
861 * delete privileges. As a result the nt_unlink() call will always
864 code = nt_unlink(inodeListPath);
866 code = unlink(inodeListPath);
869 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
872 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
876 salvinfo->inodeFd = fileno(inodeFile);
877 if (salvinfo->inodeFd == -1)
878 Abort("Temporary file %s is missing...\n", inodeListPath);
879 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
880 if (ListInodeOption) {
881 PrintInodeList(salvinfo);
884 /* enumerate volumes in the partition.
885 * figure out sets of read-only + rw volumes.
886 * salvage each set, read-only volumes first, then read-write.
887 * Fix up inodes on last volume in set (whether it is read-write
890 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
894 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
895 i < salvinfo->nVolumesInInodeFile; i = j) {
896 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
898 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
900 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
901 struct VolumeSummary *tsp;
902 /* Scan volume list (from partition root directory) looking for the
903 * current rw volume number in the volume list from the inode scan.
904 * If there is one here that is not in the inode volume list,
906 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
908 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
910 /* Now match up the volume summary info from the root directory with the
911 * entry in the volume list obtained from scanning inodes */
912 salvinfo->inodeSummary[j].volSummary = NULL;
913 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
914 if (tsp->header.id == vid) {
915 salvinfo->inodeSummary[j].volSummary = tsp;
921 /* Salvage the group of volumes (several read-only + 1 read/write)
922 * starting with the current read-only volume we're looking at.
924 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
927 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
928 for (; vsp < esp; vsp++) {
930 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
933 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
934 RemoveTheForce(salvinfo->fileSysPath);
936 if (!Testing && singleVolumeNumber) {
937 #ifdef AFS_DEMAND_ATTACH_FS
938 /* unlock vol headers so the fs can attach them when we AskOnline */
939 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
940 #endif /* AFS_DEMAND_ATTACH_FS */
942 AskOnline(salvinfo, singleVolumeNumber);
944 /* Step through the volumeSummary list and set all volumes on-line.
945 * The volumes were taken off-line in GetVolumeSummary.
947 for (j = 0; j < salvinfo->nVolumes; j++) {
948 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
952 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
953 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
956 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
960 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
963 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
966 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
969 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
971 Log("Error %ld destroying volume disk header for volume %lu\n",
972 afs_printable_int32_ld(code),
973 afs_printable_uint32_lu(vsp->header.id));
976 /* make sure we actually delete the fileName file; ENOENT
977 * is fine, since VDestroyVolumeDiskHeader probably already
979 if (unlink(path) && errno != ENOENT) {
980 Log("Unable to unlink %s (errno = %d)\n", path, errno);
987 CompareInodes(const void *_p1, const void *_p2)
989 const struct ViceInodeInfo *p1 = _p1;
990 const struct ViceInodeInfo *p2 = _p2;
991 if (p1->u.vnode.vnodeNumber == INODESPECIAL
992 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
993 VolumeId p1rwid, p2rwid;
995 (p1->u.vnode.vnodeNumber ==
996 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
998 (p2->u.vnode.vnodeNumber ==
999 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1000 if (p1rwid < p2rwid)
1002 if (p1rwid > p2rwid)
1004 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1005 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1006 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1007 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1008 if (p1->u.vnode.volumeId == p1rwid)
1010 if (p2->u.vnode.volumeId == p2rwid)
1012 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1014 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1015 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1016 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1018 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1020 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1022 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1024 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1026 /* The following tests are reversed, so that the most desirable
1027 * of several similar inodes comes first */
1028 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1029 #ifdef AFS_3DISPARES
1030 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1031 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1034 #ifdef AFS_SGI_EXMAG
1035 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1036 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1041 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1042 #ifdef AFS_3DISPARES
1043 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1044 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1047 #ifdef AFS_SGI_EXMAG
1048 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1049 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1054 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1055 #ifdef AFS_3DISPARES
1056 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1057 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1060 #ifdef AFS_SGI_EXMAG
1061 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1062 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1067 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1068 #ifdef AFS_3DISPARES
1069 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1070 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1073 #ifdef AFS_SGI_EXMAG
1074 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1075 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1084 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1085 struct InodeSummary *summary)
1087 VolumeId volume = ip->u.vnode.volumeId;
1088 VolumeId rwvolume = volume;
1093 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1095 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1097 rwvolume = ip->u.special.parentId;
1098 /* This isn't quite right, as there could (in error) be different
1099 * parent inodes in different special vnodes */
1101 if (maxunique < ip->u.vnode.vnodeUniquifier)
1102 maxunique = ip->u.vnode.vnodeUniquifier;
1106 summary->volumeId = volume;
1107 summary->RWvolumeId = rwvolume;
1108 summary->nInodes = n;
1109 summary->nSpecialInodes = nSpecial;
1110 summary->maxUniquifier = maxunique;
1114 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1116 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1117 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1118 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1123 * Collect list of inodes in file named by path. If a truly fatal error,
1124 * unlink the file and abort. For lessor errors, return -1. The file will
1125 * be unlinked by the caller.
1128 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1130 struct afs_stat status;
1133 struct ViceInodeInfo *ip, *ip_save;
1134 struct InodeSummary summary;
1135 char summaryFileName[50];
1138 char *dev = salvinfo->fileSysPath;
1139 char *wpath = salvinfo->fileSysPath;
1141 char *dev = salvinfo->fileSysDeviceName;
1142 char *wpath = salvinfo->filesysfulldev;
1144 char *part = salvinfo->fileSysPath;
1148 /* This file used to come from vfsck; cobble it up ourselves now... */
1150 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1151 singleVolumeNumber ? OnlyOneVolume : 0,
1152 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1154 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1157 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1159 if (forceSal && !ForceSalvage) {
1160 Log("***Forced salvage of all volumes on this partition***\n");
1163 fseek(inodeFile, 0L, SEEK_SET);
1164 salvinfo->inodeFd = fileno(inodeFile);
1165 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1166 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1168 tdir = (tmpdir ? tmpdir : part);
1170 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1171 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1173 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1174 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1176 summaryFile = afs_fopen(summaryFileName, "a+");
1177 if (summaryFile == NULL) {
1178 Abort("Unable to create inode summary file\n");
1182 /* Using nt_unlink here since we're really using the delete on close
1183 * semantics of unlink. In most places in the salvager, we really do
1184 * mean to unlink the file at that point. Those places have been
1185 * modified to actually do that so that the NT crt can be used there.
1187 * jaltman - As commented elsewhere, this cannot work because fopen()
1188 * does not open files with DELETE and FILE_SHARE_DELETE.
1190 code = nt_unlink(summaryFileName);
1192 code = unlink(summaryFileName);
1195 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1198 if (!canfork || debug || Fork() == 0) {
1200 unsigned long st_size=(unsigned long) status.st_size;
1201 nInodes = st_size / sizeof(struct ViceInodeInfo);
1203 fclose(summaryFile);
1204 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1205 RemoveTheForce(salvinfo->fileSysPath);
1207 struct VolumeSummary *vsp;
1210 GetVolumeSummary(salvinfo, singleVolumeNumber);
1212 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1214 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1217 Log("%s vice inodes on %s; not salvaged\n",
1218 singleVolumeNumber ? "No applicable" : "No", dev);
1221 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1223 fclose(summaryFile);
1225 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1228 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1229 fclose(summaryFile);
1230 Abort("Unable to read inode table; %s not salvaged\n", dev);
1232 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1233 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1234 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1235 fclose(summaryFile);
1236 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1241 CountVolumeInodes(ip, nInodes, &summary);
1242 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1243 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1244 fclose(summaryFile);
1247 summary.index += (summary.nInodes);
1248 nInodes -= summary.nInodes;
1249 ip += summary.nInodes;
1252 ip = ip_save = NULL;
1253 /* Following fflush is not fclose, because if it was debug mode would not work */
1254 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1255 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1256 fclose(summaryFile);
1259 if (canfork && !debug) {
1264 if (Wait("Inode summary") == -1) {
1265 fclose(summaryFile);
1266 Exit(1); /* salvage of this partition aborted */
1269 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1270 if (status.st_size != 0) {
1272 unsigned long st_status=(unsigned long)status.st_size;
1273 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1274 osi_Assert(salvinfo->inodeSummary != NULL);
1275 /* For GNU we need to do lseek to get the file pointer moved. */
1276 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1277 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1278 osi_Assert(ret == st_status);
1280 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1281 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1282 salvinfo->inodeSummary[i].volSummary = NULL;
1284 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1285 fclose(summaryFile);
1289 /* Comparison routine for volume sort.
1290 This is setup so that a read-write volume comes immediately before
1291 any read-only clones of that volume */
1293 CompareVolumes(const void *_p1, const void *_p2)
1295 const struct VolumeSummary *p1 = _p1;
1296 const struct VolumeSummary *p2 = _p2;
1297 if (p1->header.parent != p2->header.parent)
1298 return p1->header.parent < p2->header.parent ? -1 : 1;
1299 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1301 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1303 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1307 * Gleans volumeSummary information by asking the fileserver
1309 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1310 * salvaging a whole partition
1312 * @return whether we obtained the volume summary information or not
1313 * @retval 0 success; we obtained the volume summary information
1314 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1316 * @retval 1 we did not get the volume summary information; either the
1317 * fileserver responded with an error, or we are not supposed to
1318 * ask the fileserver for the information (e.g. we are salvaging
1319 * the entire partition or we are not the salvageserver)
1321 * @note for non-DAFS, always returns 1
1324 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1327 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1328 if (programType == salvageServer) {
1329 if (singleVolumeNumber) {
1330 FSSYNC_VGQry_response_t q_res;
1332 struct VolumeSummary *vsp;
1334 struct VolumeDiskHeader diskHdr;
1336 memset(&res, 0, sizeof(res));
1338 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1341 * We must wait for the partition to finish scanning before
1342 * can continue, since we will not know if we got the entire
1343 * VG membership unless the partition is fully scanned.
1344 * We could, in theory, just scan the partition ourselves if
1345 * the VG cache is not ready, but we would be doing the exact
1346 * same scan the fileserver is doing; it will almost always
1347 * be faster to wait for the fileserver. The only exceptions
1348 * are if the partition does not take very long to scan, and
1349 * in that case it's fast either way, so who cares?
1351 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1352 Log("waiting for fileserver to finish scanning partition %s...\n",
1353 salvinfo->fileSysPartition->name);
1355 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1356 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1357 * just so small partitions don't need to wait over 10
1358 * seconds every time, and large partitions are generally
1359 * polled only once every ten seconds. */
1360 sleep((i > 10) ? (i = 10) : i);
1362 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1366 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1367 /* This can happen if there's no header for the volume
1368 * we're salvaging, or no headers exist for the VG (if
1369 * we're salvaging an RW). Act as if we got a response
1370 * with no VG members. The headers may be created during
1371 * salvaging, if there are inodes in this VG. */
1373 memset(&q_res, 0, sizeof(q_res));
1374 q_res.rw = singleVolumeNumber;
1378 Log("fileserver refused VGCQuery request for volume %lu on "
1379 "partition %s, code %ld reason %ld\n",
1380 afs_printable_uint32_lu(singleVolumeNumber),
1381 salvinfo->fileSysPartition->name,
1382 afs_printable_int32_ld(code),
1383 afs_printable_int32_ld(res.hdr.reason));
1387 if (q_res.rw != singleVolumeNumber) {
1388 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1389 afs_printable_uint32_lu(singleVolumeNumber),
1390 afs_printable_uint32_lu(q_res.rw));
1391 #ifdef SALVSYNC_BUILD_CLIENT
1392 if (SALVSYNC_LinkVolume(q_res.rw,
1394 salvinfo->fileSysPartition->name,
1396 Log("schedule request failed\n");
1398 #endif /* SALVSYNC_BUILD_CLIENT */
1399 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1402 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1403 osi_Assert(salvinfo->volumeSummaryp != NULL);
1405 salvinfo->nVolumes = 0;
1406 vsp = salvinfo->volumeSummaryp;
1408 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1409 char name[VMAXPATHLEN];
1411 if (!q_res.children[i]) {
1415 /* AskOffline for singleVolumeNumber was called much earlier */
1416 if (q_res.children[i] != singleVolumeNumber) {
1417 AskOffline(salvinfo, q_res.children[i]);
1418 if (LockVolume(salvinfo, q_res.children[i])) {
1424 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1426 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1427 afs_printable_uint32_lu(q_res.children[i]));
1432 DiskToVolumeHeader(&vsp->header, &diskHdr);
1433 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1434 vsp->fileName = ToString(name);
1435 salvinfo->nVolumes++;
1439 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1444 Log("Cannot get volume summary from fileserver; falling back to scanning "
1445 "entire partition\n");
1448 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1453 * count how many volume headers are found by VWalkVolumeHeaders.
1455 * @param[in] dp the disk partition (unused)
1456 * @param[in] name full path to the .vol header (unused)
1457 * @param[in] hdr the header data (unused)
1458 * @param[in] last whether this is the last try or not (unused)
1459 * @param[in] rock actually an afs_int32*; the running count of how many
1460 * volumes we have found
1465 CountHeader(struct DiskPartition64 *dp, const char *name,
1466 struct VolumeDiskHeader *hdr, int last, void *rock)
1468 afs_int32 *nvols = (afs_int32 *)rock;
1474 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1477 struct SalvageScanParams {
1478 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1479 * vol id of the VG we're salvaging */
1480 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1481 * we're filling in */
1482 afs_int32 nVolumes; /**< # of vols we've encountered */
1483 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1484 * # of vols we've alloc'd memory for) */
1485 int retry; /**< do we need to retry vol lock/checkout? */
1486 struct SalvInfo *salvinfo; /**< salvage job info */
1490 * records volume summary info found from VWalkVolumeHeaders.
1492 * Found volumes are also taken offline if they are in the specific volume
1493 * group we are looking for.
1495 * @param[in] dp the disk partition
1496 * @param[in] name full path to the .vol header
1497 * @param[in] hdr the header data
1498 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1499 * @param[in] rock actually a struct SalvageScanParams*, containing the
1500 * information needed to record the volume summary data
1502 * @return operation status
1504 * @retval -1 volume locking raced with fileserver restart; checking out
1505 * and locking volumes needs to be retried
1506 * @retval 1 volume header is mis-named and should be deleted
1509 RecordHeader(struct DiskPartition64 *dp, const char *name,
1510 struct VolumeDiskHeader *hdr, int last, void *rock)
1512 char nameShouldBe[64];
1513 struct SalvageScanParams *params;
1514 struct VolumeSummary summary;
1515 VolumeId singleVolumeNumber;
1516 struct SalvInfo *salvinfo;
1518 params = (struct SalvageScanParams *)rock;
1520 singleVolumeNumber = params->singleVolumeNumber;
1521 salvinfo = params->salvinfo;
1523 DiskToVolumeHeader(&summary.header, hdr);
1525 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1526 && summary.header.parent != singleVolumeNumber) {
1528 if (programType == salvageServer) {
1529 #ifdef SALVSYNC_BUILD_CLIENT
1530 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1531 summary.header.id, summary.header.parent);
1532 if (SALVSYNC_LinkVolume(summary.header.parent,
1536 Log("schedule request failed\n");
1539 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1542 Log("%u is a read-only volume; not salvaged\n",
1543 singleVolumeNumber);
1548 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1549 || summary.header.parent == singleVolumeNumber) {
1551 /* check if the header file is incorrectly named */
1553 const char *base = strrchr(name, OS_DIRSEPC);
1560 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1561 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1564 if (strcmp(nameShouldBe, base)) {
1565 /* .vol file has wrong name; retry/delete */
1569 if (!badname || last) {
1570 /* only offline the volume if the header is good, or if this is
1571 * the last try looking at it; avoid AskOffline'ing the same vol
1574 if (singleVolumeNumber
1575 && summary.header.id != singleVolumeNumber) {
1576 /* don't offline singleVolumeNumber; we already did that
1579 AskOffline(salvinfo, summary.header.id);
1581 #ifdef AFS_DEMAND_ATTACH_FS
1583 /* don't lock the volume if the header is bad, since we're
1584 * about to delete it anyway. */
1585 if (LockVolume(salvinfo, summary.header.id)) {
1590 #endif /* AFS_DEMAND_ATTACH_FS */
1594 if (last && !Showmode) {
1595 Log("Volume header file %s is incorrectly named (should be %s "
1596 "not %s); %sdeleted (it will be recreated later, if "
1597 "necessary)\n", name, nameShouldBe, base,
1598 (Testing ? "it would have been " : ""));
1603 summary.fileName = ToString(base);
1606 if (params->nVolumes > params->totalVolumes) {
1607 /* We found more volumes than we found on the first partition walk;
1608 * apparently something created a volume while we were
1609 * partition-salvaging, or we found more than 20 vols when salvaging a
1610 * particular volume. Abort if we detect this, since other programs
1611 * supposed to not touch the partition while it is partition-salvaging,
1612 * and we shouldn't find more than 20 vols in a VG.
1614 Abort("Found %ld vol headers, but should have found at most %ld! "
1615 "Make sure the volserver/fileserver are not running at the "
1616 "same time as a partition salvage\n",
1617 afs_printable_int32_ld(params->nVolumes),
1618 afs_printable_int32_ld(params->totalVolumes));
1621 memcpy(params->vsp, &summary, sizeof(summary));
1629 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1631 * If the header could not be read in at all, the header is always unlinked.
1632 * If instead RecordHeader said the header was bad (that is, the header file
1633 * is mis-named), we only unlink if we are doing a partition salvage, as
1634 * opposed to salvaging a specific volume group.
1636 * @param[in] dp the disk partition
1637 * @param[in] name full path to the .vol header
1638 * @param[in] hdr header data, or NULL if the header could not be read
1639 * @param[in] rock actually a struct SalvageScanParams*, with some information
1643 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1644 struct VolumeDiskHeader *hdr, void *rock)
1646 struct SalvageScanParams *params;
1649 params = (struct SalvageScanParams *)rock;
1652 /* no header; header is too bogus to read in at all */
1654 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1660 } else if (!params->singleVolumeNumber) {
1661 /* We were able to read in a header, but RecordHeader said something
1662 * was wrong with it. We only unlink those if we are doing a partition
1669 if (dounlink && unlink(name)) {
1670 Log("Error %d while trying to unlink %s\n", errno, name);
1675 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1676 * the fileserver for VG information, or by scanning the /vicepX partition.
1678 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1679 * are salvaging, or 0 if this is a partition
1682 * @return operation status
1684 * @retval -1 we raced with a fileserver restart; checking out and locking
1685 * volumes must be retried
1688 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1690 afs_int32 nvols = 0;
1691 struct SalvageScanParams params;
1694 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1696 /* we successfully got the vol information from the fileserver; no
1697 * need to scan the partition */
1701 /* we need to retry volume checkout */
1705 if (!singleVolumeNumber) {
1706 /* Count how many volumes we have in /vicepX */
1707 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1710 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1715 nvols = VOL_VG_MAX_VOLS;
1718 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1719 osi_Assert(salvinfo->volumeSummaryp != NULL);
1721 params.singleVolumeNumber = singleVolumeNumber;
1722 params.vsp = salvinfo->volumeSummaryp;
1723 params.nVolumes = 0;
1724 params.totalVolumes = nvols;
1726 params.salvinfo = salvinfo;
1728 /* walk the partition directory of volume headers and record the info
1729 * about them; unlinking invalid headers */
1730 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1731 UnlinkHeader, ¶ms);
1733 /* we apparently need to retry checking-out/locking volumes */
1737 Abort("Failed to get volume header summary\n");
1739 salvinfo->nVolumes = params.nVolumes;
1741 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1747 /* Find the link table. This should be associated with the RW volume or, if
1748 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1751 FindLinkHandle(struct InodeSummary *isp, int nVols,
1752 struct ViceInodeInfo *allInodes)
1755 struct ViceInodeInfo *ip;
1757 for (i = 0; i < nVols; i++) {
1758 ip = allInodes + isp[i].index;
1759 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1760 if (ip[j].u.special.type == VI_LINKTABLE)
1761 return ip[j].inodeNumber;
1768 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1770 struct versionStamp version;
1773 if (!VALID_INO(ino))
1775 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1776 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1777 if (!VALID_INO(ino))
1779 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1780 isp->RWvolumeId, errno);
1781 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1782 fdP = IH_OPEN(salvinfo->VGLinkH);
1784 Abort("Can't open link table for volume %u (error = %d)\n",
1785 isp->RWvolumeId, errno);
1787 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1788 Abort("Can't truncate link table for volume %u (error = %d)\n",
1789 isp->RWvolumeId, errno);
1791 version.magic = LINKTABLEMAGIC;
1792 version.version = LINKTABLEVERSION;
1794 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1796 Abort("Can't truncate link table for volume %u (error = %d)\n",
1797 isp->RWvolumeId, errno);
1799 FDH_REALLYCLOSE(fdP);
1801 /* If the volume summary exits (i.e., the V*.vol header file exists),
1802 * then set this inode there as well.
1804 if (isp->volSummary)
1805 isp->volSummary->header.linkTable = ino;
1814 SVGParms_t *parms = (SVGParms_t *) arg;
1815 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1820 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1823 pthread_attr_t tattr;
1827 /* Initialize per volume global variables, even if later code does so */
1828 salvinfo->VolumeChanged = 0;
1829 salvinfo->VGLinkH = NULL;
1830 salvinfo->VGLinkH_cnt = 0;
1831 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1833 parms.svgp_inodeSummaryp = isp;
1834 parms.svgp_count = nVols;
1835 parms.svgp_salvinfo = salvinfo;
1836 code = pthread_attr_init(&tattr);
1838 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1842 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1844 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1847 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1849 Log("Failed to create thread to salvage volume group %u\n",
1853 (void)pthread_join(tid, NULL);
1855 #endif /* AFS_NT40_ENV */
1858 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1860 struct ViceInodeInfo *inodes, *allInodes, *ip;
1861 int i, totalInodes, size, salvageTo;
1865 int dec_VGLinkH = 0;
1867 FdHandle_t *fdP = NULL;
1869 salvinfo->VGLinkH_cnt = 0;
1870 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1871 && isp->nSpecialInodes > 0);
1872 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1873 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1876 if (ShowMounts && !haveRWvolume)
1878 if (canfork && !debug && Fork() != 0) {
1879 (void)Wait("Salvage volume group");
1882 for (i = 0, totalInodes = 0; i < nVols; i++)
1883 totalInodes += isp[i].nInodes;
1884 size = totalInodes * sizeof(struct ViceInodeInfo);
1885 inodes = (struct ViceInodeInfo *)malloc(size);
1886 allInodes = inodes - isp->index; /* this would the base of all the inodes
1887 * for the partition, if all the inodes
1888 * had been read into memory */
1889 osi_Assert(afs_lseek
1890 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1892 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1894 /* Don't try to salvage a read write volume if there isn't one on this
1896 salvageTo = haveRWvolume ? 0 : 1;
1898 #ifdef AFS_NAMEI_ENV
1899 ino = FindLinkHandle(isp, nVols, allInodes);
1900 if (VALID_INO(ino)) {
1901 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1904 if (!VALID_INO(ino) || fdP == NULL) {
1905 Log("%s link table for volume %u.\n",
1906 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1908 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1911 struct ViceInodeInfo *ip;
1912 CreateLinkTable(salvinfo, isp, ino);
1913 fdP = IH_OPEN(salvinfo->VGLinkH);
1914 /* Sync fake 1 link counts to the link table, now that it exists */
1916 for (i = 0; i < nVols; i++) {
1917 ip = allInodes + isp[i].index;
1918 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1919 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1926 FDH_REALLYCLOSE(fdP);
1928 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1931 /* Salvage in reverse order--read/write volume last; this way any
1932 * Inodes not referenced by the time we salvage the read/write volume
1933 * can be picked up by the read/write volume */
1934 /* ACTUALLY, that's not done right now--the inodes just vanish */
1935 for (i = nVols - 1; i >= salvageTo; i--) {
1937 struct InodeSummary *lisp = &isp[i];
1938 #ifdef AFS_NAMEI_ENV
1939 /* If only the RO is present on this partition, the link table
1940 * shows up as a RW volume special file. Need to make sure the
1941 * salvager doesn't try to salvage the non-existent RW.
1943 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1944 /* If this only special inode is the link table, continue */
1945 if (inodes->u.special.type == VI_LINKTABLE) {
1952 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1953 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1954 /* Check inodes twice. The second time do things seriously. This
1955 * way the whole RO volume can be deleted, below, if anything goes wrong */
1956 for (check = 1; check >= 0; check--) {
1958 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1960 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1961 if (rw && deleteMe) {
1962 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1963 * volume won't be called */
1969 if (rw && check == 1)
1971 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1972 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1978 /* Fix actual inode counts */
1981 Log("totalInodes %d\n",totalInodes);
1982 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1983 static int TraceBadLinkCounts = 0;
1984 #ifdef AFS_NAMEI_ENV
1985 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1986 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1987 VGLinkH_p1 = ip->u.param[0];
1988 continue; /* Deal with this last. */
1991 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1992 TraceBadLinkCounts--; /* Limit reports, per volume */
1993 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1995 while (ip->linkCount > 0) {
1996 /* below used to assert, not break */
1998 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1999 Log("idec failed. inode %s errno %d\n",
2000 PrintInode(stmp, ip->inodeNumber), errno);
2006 while (ip->linkCount < 0) {
2007 /* these used to be asserts */
2009 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2010 Log("iinc failed. inode %s errno %d\n",
2011 PrintInode(stmp, ip->inodeNumber), errno);
2018 #ifdef AFS_NAMEI_ENV
2019 while (dec_VGLinkH > 0) {
2020 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2021 Log("idec failed on link table, errno = %d\n", errno);
2025 while (dec_VGLinkH < 0) {
2026 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2027 Log("iinc failed on link table, errno = %d\n", errno);
2034 /* Directory consistency checks on the rw volume */
2036 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2037 IH_RELEASE(salvinfo->VGLinkH);
2039 if (canfork && !debug) {
2046 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2048 /* Check headers BEFORE forking */
2052 for (i = 0; i < nVols; i++) {
2053 struct VolumeSummary *vs = isp[i].volSummary;
2054 VolumeDiskData volHeader;
2056 /* Don't salvage just because phantom rw volume is there... */
2057 /* (If a read-only volume exists, read/write inodes must also exist) */
2058 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2062 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2063 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2064 == sizeof(volHeader)
2065 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2066 && volHeader.dontSalvage == DONT_SALVAGE
2067 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2068 if (volHeader.inUse != 0) {
2069 volHeader.inUse = 0;
2070 volHeader.inService = 1;
2072 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2073 != sizeof(volHeader)) {
2089 /* SalvageVolumeHeaderFile
2091 * Salvage the top level V*.vol header file. Make sure the special files
2092 * exist and that there are no duplicates.
2094 * Calls SalvageHeader for each possible type of volume special file.
2098 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2099 struct ViceInodeInfo *inodes, int RW,
2100 int check, int *deleteMe)
2103 struct ViceInodeInfo *ip;
2104 int allinodesobsolete = 1;
2105 struct VolumeDiskHeader diskHeader;
2106 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2108 struct VolumeHeader tempHeader;
2109 struct afs_inode_info stuff[MAXINODETYPE];
2111 /* keeps track of special inodes that are probably 'good'; they are
2112 * referenced in the vol header, and are included in the given inodes
2117 } goodspecial[MAXINODETYPE];
2122 memset(goodspecial, 0, sizeof(goodspecial));
2124 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2126 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2128 Log("cannot allocate memory for inode skip array when salvaging "
2129 "volume %lu; not performing duplicate special inode recovery\n",
2130 afs_printable_uint32_lu(isp->volumeId));
2131 /* still try to perform the salvage; the skip array only does anything
2132 * if we detect duplicate special inodes */
2135 init_inode_info(&tempHeader, stuff);
2138 * First, look at the special inodes and see if any are referenced by
2139 * the existing volume header. If we find duplicate special inodes, we
2140 * can use this information to use the referenced inode (it's more
2141 * likely to be the 'good' one), and throw away the duplicates.
2143 if (isp->volSummary && skip) {
2144 /* use tempHeader, so we can use the stuff[] array to easily index
2145 * into the isp->volSummary special inodes */
2146 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2148 for (i = 0; i < isp->nSpecialInodes; i++) {
2149 ip = &inodes[isp->index + i];
2150 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2151 /* will get taken care of in a later loop */
2154 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2155 goodspecial[ip->u.special.type-1].valid = 1;
2156 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2161 memset(&tempHeader, 0, sizeof(tempHeader));
2162 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2163 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2164 tempHeader.id = isp->volumeId;
2165 tempHeader.parent = isp->RWvolumeId;
2167 /* Check for duplicates (inodes are sorted by type field) */
2168 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2169 ip = &inodes[isp->index + i];
2170 if (ip->u.special.type == (ip + 1)->u.special.type) {
2171 afs_ino_str_t stmp1, stmp2;
2173 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2174 /* Will be caught in the loop below */
2178 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2179 ip->u.special.type, isp->volumeId,
2180 PrintInode(stmp1, ip->inodeNumber),
2181 PrintInode(stmp2, (ip+1)->inodeNumber));
2183 if (skip && goodspecial[ip->u.special.type-1].valid) {
2184 Inode gi = goodspecial[ip->u.special.type-1].inode;
2187 Log("using special inode referenced by vol header (%s)\n",
2188 PrintInode(stmp1, gi));
2191 /* the volume header references some special inode of
2192 * this type in the inodes array; are we it? */
2193 if (ip->inodeNumber != gi) {
2195 } else if ((ip+1)->inodeNumber != gi) {
2196 /* in case this is the last iteration; we need to
2197 * make sure we check ip+1, too */
2202 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2210 for (i = 0; i < isp->nSpecialInodes; i++) {
2212 ip = &inodes[isp->index + i];
2213 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2215 Log("Rubbish header inode %s of type %d\n",
2216 PrintInode(stmp, ip->inodeNumber),
2217 ip->u.special.type);
2223 Log("Rubbish header inode %s of type %d; deleted\n",
2224 PrintInode(stmp, ip->inodeNumber),
2225 ip->u.special.type);
2226 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2227 if (skip && skip[i]) {
2228 if (orphans == ORPH_REMOVE) {
2229 Log("Removing orphan special inode %s of type %d\n",
2230 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2233 Log("Ignoring orphan special inode %s of type %d\n",
2234 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2235 /* fall through to the ip->linkCount--; line below */
2238 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2239 allinodesobsolete = 0;
2241 if (!check && ip->u.special.type != VI_LINKTABLE)
2242 ip->linkCount--; /* Keep the inode around */
2250 if (allinodesobsolete) {
2257 salvinfo->VGLinkH_cnt++; /* one for every header. */
2259 if (!RW && !check && isp->volSummary) {
2260 ClearROInUseBit(isp->volSummary);
2264 for (i = 0; i < MAXINODETYPE; i++) {
2265 if (stuff[i].inodeType == VI_LINKTABLE) {
2266 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2267 * And we may have recreated the link table earlier, so set the
2268 * RW header as well.
2270 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2271 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2275 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2279 if (isp->volSummary == NULL) {
2281 char headerName[64];
2282 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2283 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2285 Log("No header file for volume %u\n", isp->volumeId);
2289 Log("No header file for volume %u; %screating %s\n",
2290 isp->volumeId, (Testing ? "it would have been " : ""),
2292 isp->volSummary = (struct VolumeSummary *)
2293 malloc(sizeof(struct VolumeSummary));
2294 isp->volSummary->fileName = ToString(headerName);
2296 writefunc = VCreateVolumeDiskHeader;
2299 char headerName[64];
2300 /* hack: these two fields are obsolete... */
2301 isp->volSummary->header.volumeAcl = 0;
2302 isp->volSummary->header.volumeMountTable = 0;
2305 (&isp->volSummary->header, &tempHeader,
2306 sizeof(struct VolumeHeader))) {
2307 /* We often remove the name before calling us, so we make a fake one up */
2308 if (isp->volSummary->fileName) {
2309 strcpy(headerName, isp->volSummary->fileName);
2311 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2312 isp->volSummary->fileName = ToString(headerName);
2314 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2316 Log("Header file %s is damaged or no longer valid%s\n", path,
2317 (check ? "" : "; repairing"));
2321 writefunc = VWriteVolumeDiskHeader;
2325 memcpy(&isp->volSummary->header, &tempHeader,
2326 sizeof(struct VolumeHeader));
2329 Log("It would have written a new header file for volume %u\n",
2333 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2334 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2336 Log("Error %ld writing volume header file for volume %lu\n",
2337 afs_printable_int32_ld(code),
2338 afs_printable_uint32_lu(diskHeader.id));
2343 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2344 isp->volSummary->header.volumeInfo);
2349 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2350 struct InodeSummary *isp, int check, int *deleteMe)
2353 VolumeDiskData volumeInfo;
2354 struct versionStamp fileHeader;
2363 #ifndef AFS_NAMEI_ENV
2364 if (sp->inodeType == VI_LINKTABLE)
2367 if (*(sp->inode) == 0) {
2369 Log("Missing inode in volume header (%s)\n", sp->description);
2373 Log("Missing inode in volume header (%s); %s\n", sp->description,
2374 (Testing ? "it would have recreated it" : "recreating"));
2377 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2378 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2379 if (!VALID_INO(*(sp->inode)))
2381 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2382 sp->description, errno);
2387 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2388 fdP = IH_OPEN(specH);
2389 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2390 /* bail out early and destroy the volume */
2392 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2399 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2400 sp->description, errno);
2403 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2404 || header.fileHeader.magic != sp->stamp.magic)) {
2406 Log("Part of the header (%s) is corrupted\n", sp->description);
2407 FDH_REALLYCLOSE(fdP);
2411 Log("Part of the header (%s) is corrupted; recreating\n",
2414 /* header can be garbage; make sure we don't read garbage data from
2416 memset(&header, 0, sizeof(header));
2418 if (sp->inodeType == VI_VOLINFO
2419 && header.volumeInfo.destroyMe == DESTROY_ME) {
2422 FDH_REALLYCLOSE(fdP);
2426 if (recreate && !Testing) {
2429 ("Internal error: recreating volume header (%s) in check mode\n",
2431 nBytes = FDH_TRUNC(fdP, 0);
2433 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2434 sp->description, errno);
2436 /* The following code should be moved into vutil.c */
2437 if (sp->inodeType == VI_VOLINFO) {
2439 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2440 header.volumeInfo.stamp = sp->stamp;
2441 header.volumeInfo.id = isp->volumeId;
2442 header.volumeInfo.parentId = isp->RWvolumeId;
2443 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2444 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2445 isp->volumeId, isp->volumeId);
2446 header.volumeInfo.inService = 0;
2447 header.volumeInfo.blessed = 0;
2448 /* The + 1000 is a hack in case there are any files out in venus caches */
2449 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2450 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2451 header.volumeInfo.needsCallback = 0;
2452 gettimeofday(&tp, 0);
2453 header.volumeInfo.creationDate = tp.tv_sec;
2455 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2456 sizeof(header.volumeInfo), 0);
2457 if (nBytes != sizeof(header.volumeInfo)) {
2460 ("Unable to write volume header file (%s) (errno = %d)\n",
2461 sp->description, errno);
2462 Abort("Unable to write entire volume header file (%s)\n",
2466 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2467 if (nBytes != sizeof(sp->stamp)) {
2470 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2471 sp->description, errno);
2473 ("Unable to write entire version stamp in volume header file (%s)\n",
2478 FDH_REALLYCLOSE(fdP);
2480 if (sp->inodeType == VI_VOLINFO) {
2481 salvinfo->VolInfo = header.volumeInfo;
2485 if (salvinfo->VolInfo.updateDate) {
2486 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2488 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2489 salvinfo->VolInfo.id,
2490 (Testing ? "it would have been " : ""), update);
2492 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2494 Log("%s (%u) not updated (created %s)\n",
2495 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2505 SalvageVnodes(struct SalvInfo *salvinfo,
2506 struct InodeSummary *rwIsp,
2507 struct InodeSummary *thisIsp,
2508 struct ViceInodeInfo *inodes, int check)
2510 int ilarge, ismall, ioffset, RW, nInodes;
2511 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2514 RW = (rwIsp == thisIsp);
2515 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2517 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2518 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2519 if (check && ismall == -1)
2522 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2523 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2524 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2528 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2529 struct ViceInodeInfo *ip, int nInodes,
2530 struct VolumeSummary *volSummary, int check)
2532 char buf[SIZEOF_LARGEDISKVNODE];
2533 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2535 StreamHandle_t *file;
2536 struct VnodeClassInfo *vcp;
2538 afs_sfsize_t nVnodes;
2539 afs_fsize_t vnodeLength;
2541 afs_ino_str_t stmp1, stmp2;
2545 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2546 fdP = IH_OPEN(handle);
2547 osi_Assert(fdP != NULL);
2548 file = FDH_FDOPEN(fdP, "r+");
2549 osi_Assert(file != NULL);
2550 vcp = &VnodeClassInfo[class];
2551 size = OS_SIZE(fdP->fd_fd);
2552 osi_Assert(size != -1);
2553 nVnodes = (size / vcp->diskSize) - 1;
2555 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2556 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2560 for (vnodeIndex = 0;
2561 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2562 nVnodes--, vnodeIndex++) {
2563 if (vnode->type != vNull) {
2564 int vnodeChanged = 0;
2565 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2566 if (VNDISK_GET_INO(vnode) == 0) {
2568 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2569 memset(vnode, 0, vcp->diskSize);
2573 if (vcp->magic != vnode->vnodeMagic) {
2574 /* bad magic #, probably partially created vnode */
2576 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2577 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2578 afs_printable_uint32_lu(vcp->magic));
2579 memset(vnode, 0, vcp->diskSize);
2583 Log("Partially allocated vnode %d deleted.\n",
2585 memset(vnode, 0, vcp->diskSize);
2589 /* ****** Should do a bit more salvage here: e.g. make sure
2590 * vnode type matches what it should be given the index */
2591 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2592 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2593 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2594 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2601 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2602 /* The following doesn't work, because the version number
2603 * is not maintained correctly by the file server */
2604 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2605 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2607 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2613 /* For RW volume, look for vnode with matching inode number;
2614 * if no such match, take the first determined by our sort
2616 struct ViceInodeInfo *lip = ip;
2617 int lnInodes = nInodes;
2619 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2620 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2629 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2630 /* "Matching" inode */
2634 vu = vnode->uniquifier;
2635 iu = ip->u.vnode.vnodeUniquifier;
2636 vd = vnode->dataVersion;
2637 id = ip->u.vnode.inodeDataVersion;
2639 * Because of the possibility of the uniquifier overflows (> 4M)
2640 * we compare them modulo the low 22-bits; we shouldn't worry
2641 * about mismatching since they shouldn't to many old
2642 * uniquifiers of the same vnode...
2644 if (IUnique(vu) != IUnique(iu)) {
2646 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2649 vnode->uniquifier = iu;
2650 #ifdef AFS_3DISPARES
2651 vnode->dataVersion = (id >= vd ?
2654 1887437 ? vd : id) :
2657 1887437 ? id : vd));
2659 #if defined(AFS_SGI_EXMAG)
2660 vnode->dataVersion = (id >= vd ?
2663 15099494 ? vd : id) :
2666 15099494 ? id : vd));
2668 vnode->dataVersion = (id > vd ? id : vd);
2669 #endif /* AFS_SGI_EXMAG */
2670 #endif /* AFS_3DISPARES */
2673 /* don't bother checking for vd > id any more, since
2674 * partial file transfers always result in this state,
2675 * and you can't do much else anyway (you've already
2676 * found the best data you can) */
2677 #ifdef AFS_3DISPARES
2678 if (!vnodeIsDirectory(vnodeNumber)
2679 && ((vd < id && (id - vd) < 1887437)
2680 || ((vd > id && (vd - id) > 1887437)))) {
2682 #if defined(AFS_SGI_EXMAG)
2683 if (!vnodeIsDirectory(vnodeNumber)
2684 && ((vd < id && (id - vd) < 15099494)
2685 || ((vd > id && (vd - id) > 15099494)))) {
2687 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2688 #endif /* AFS_SGI_EXMAG */
2691 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2692 vnode->dataVersion = id;
2697 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2700 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2702 VNDISK_SET_INO(vnode, ip->inodeNumber);
2707 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2709 VNDISK_SET_INO(vnode, ip->inodeNumber);
2712 VNDISK_GET_LEN(vnodeLength, vnode);
2713 if (ip->byteCount != vnodeLength) {
2716 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2721 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2722 VNDISK_SET_LEN(vnode, ip->byteCount);
2726 ip->linkCount--; /* Keep the inode around */
2729 } else { /* no matching inode */
2731 if (VNDISK_GET_INO(vnode) != 0
2732 || vnode->type == vDirectory) {
2733 /* No matching inode--get rid of the vnode */
2735 if (VNDISK_GET_INO(vnode)) {
2737 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2741 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2746 if (VNDISK_GET_INO(vnode)) {
2748 time_t serverModifyTime = vnode->serverModifyTime;
2749 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2753 time_t serverModifyTime = vnode->serverModifyTime;
2754 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2757 memset(vnode, 0, vcp->diskSize);
2760 /* Should not reach here becuase we checked for
2761 * (inodeNumber == 0) above. And where we zero the vnode,
2762 * we also goto vnodeDone.
2766 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2770 } /* VNDISK_GET_INO(vnode) != 0 */
2772 osi_Assert(!(vnodeChanged && check));
2773 if (vnodeChanged && !Testing) {
2774 osi_Assert(IH_IWRITE
2775 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2776 (char *)vnode, vcp->diskSize)
2778 salvinfo->VolumeChanged = 1; /* For break call back */
2789 struct VnodeEssence *
2790 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2793 struct VnodeInfo *vip;
2796 class = vnodeIdToClass(vnodeNumber);
2797 vip = &salvinfo->vnodeInfo[class];
2798 offset = vnodeIdToBitNumber(vnodeNumber);
2799 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2803 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2805 /* Copy the directory unconditionally if we are going to change it:
2806 * not just if was cloned.
2808 struct VnodeDiskObject vnode;
2809 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2810 Inode oldinode, newinode;
2813 if (dir->copied || Testing)
2815 DFlush(); /* Well justified paranoia... */
2818 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2819 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2821 osi_Assert(code == sizeof(vnode));
2822 oldinode = VNDISK_GET_INO(&vnode);
2823 /* Increment the version number by a whole lot to avoid problems with
2824 * clients that were promised new version numbers--but the file server
2825 * crashed before the versions were written to disk.
2828 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2829 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2831 osi_Assert(VALID_INO(newinode));
2832 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2834 VNDISK_SET_INO(&vnode, newinode);
2836 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2837 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2839 osi_Assert(code == sizeof(vnode));
2841 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2842 salvinfo->fileSysDevice, newinode,
2843 &salvinfo->VolumeChanged);
2844 /* Don't delete the original inode right away, because the directory is
2845 * still being scanned.
2851 * This function should either successfully create a new dir, or give up
2852 * and leave things the way they were. In particular, if it fails to write
2853 * the new dir properly, it should return w/o changing the reference to the
2857 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2859 struct VnodeDiskObject vnode;
2860 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2861 Inode oldinode, newinode;
2866 afs_int32 parentUnique = 1;
2867 struct VnodeEssence *vnodeEssence;
2872 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2874 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2875 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2877 osi_Assert(lcode == sizeof(vnode));
2878 oldinode = VNDISK_GET_INO(&vnode);
2879 /* Increment the version number by a whole lot to avoid problems with
2880 * clients that were promised new version numbers--but the file server
2881 * crashed before the versions were written to disk.
2884 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2885 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2887 osi_Assert(VALID_INO(newinode));
2888 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2889 &salvinfo->VolumeChanged);
2891 /* Assign . and .. vnode numbers from dir and vnode.parent.
2892 * The uniquifier for . is in the vnode.
2893 * The uniquifier for .. might be set to a bogus value of 1 and
2894 * the salvager will later clean it up.
2896 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2897 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2900 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2902 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2907 /* didn't really build the new directory properly, let's just give up. */
2908 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2909 Log("Directory salvage returned code %d, continuing.\n", code);
2911 Log("also failed to decrement link count on new inode");
2915 Log("Checking the results of the directory salvage...\n");
2916 if (!DirOK(&newdir)) {
2917 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2918 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2919 osi_Assert(code == 0);
2923 VNDISK_SET_INO(&vnode, newinode);
2924 length = Length(&newdir);
2925 VNDISK_SET_LEN(&vnode, length);
2927 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2928 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2930 osi_Assert(lcode == sizeof(vnode));
2933 nt_sync(salvinfo->fileSysDevice);
2935 sync(); /* this is slow, but hopefully rarely called. We don't have
2936 * an open FD on the file itself to fsync.
2940 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2942 /* make sure old directory file is really closed */
2943 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2944 FDH_REALLYCLOSE(fdP);
2946 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2947 osi_Assert(code == 0);
2948 dir->dirHandle = newdir;
2952 * arguments for JudgeEntry.
2954 struct judgeEntry_params {
2955 struct DirSummary *dir; /**< directory we're examining entries in */
2956 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2960 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2963 struct judgeEntry_params *params = arock;
2964 struct DirSummary *dir = params->dir;
2965 struct SalvInfo *salvinfo = params->salvinfo;
2966 struct VnodeEssence *vnodeEssence;
2967 afs_int32 dirOrphaned, todelete;
2969 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2971 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2972 if (vnodeEssence == NULL) {
2974 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2977 CopyOnWrite(salvinfo, dir);
2978 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2983 #ifndef AFS_NAMEI_ENV
2984 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2985 * mount inode for the partition. If this inode were deleted, it would crash
2988 if (vnodeEssence->InodeNumber == 0) {
2989 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2991 CopyOnWrite(salvinfo, dir);
2992 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2999 if (!(vnodeNumber & 1) && !Showmode
3000 && !(vnodeEssence->count || vnodeEssence->unique
3001 || vnodeEssence->modeBits)) {
3002 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3003 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3004 vnodeNumber, unique,
3005 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3009 CopyOnWrite(salvinfo, dir);
3010 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3016 /* Check if the Uniquifiers match. If not, change the directory entry
3017 * so its unique matches the vnode unique. Delete if the unique is zero
3018 * or if the directory is orphaned.
3020 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3021 if (!vnodeEssence->unique
3022 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3023 /* This is an orphaned directory. Don't delete the . or ..
3024 * entry. Otherwise, it will get created in the next
3025 * salvage and deleted again here. So Just skip it.
3030 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3033 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3037 fid.Vnode = vnodeNumber;
3038 fid.Unique = vnodeEssence->unique;
3039 CopyOnWrite(salvinfo, dir);
3040 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3042 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3045 return 0; /* no need to continue */
3048 if (strcmp(name, ".") == 0) {
3049 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3052 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3054 CopyOnWrite(salvinfo, dir);
3055 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3056 fid.Vnode = dir->vnodeNumber;
3057 fid.Unique = dir->unique;
3058 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3061 vnodeNumber = fid.Vnode; /* Get the new Essence */
3062 unique = fid.Unique;
3063 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3066 } else if (strcmp(name, "..") == 0) {
3069 struct VnodeEssence *dotdot;
3070 pa.Vnode = dir->parent;
3071 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3072 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3073 pa.Unique = dotdot->unique;
3075 pa.Vnode = dir->vnodeNumber;
3076 pa.Unique = dir->unique;
3078 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3080 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3082 CopyOnWrite(salvinfo, dir);
3083 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3084 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3087 vnodeNumber = pa.Vnode; /* Get the new Essence */
3089 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3091 dir->haveDotDot = 1;
3092 } else if (strncmp(name, ".__afs", 6) == 0) {
3094 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3097 CopyOnWrite(salvinfo, dir);
3098 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3100 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3101 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3104 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3105 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3106 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3107 && !(vnodeEssence->modeBits & 0111)) {
3108 afs_sfsize_t nBytes;
3114 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3115 vnodeEssence->InodeNumber);
3118 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3122 size = FDH_SIZE(fdP);
3124 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3125 FDH_REALLYCLOSE(fdP);
3132 nBytes = FDH_PREAD(fdP, buf, size, 0);
3133 if (nBytes == size) {
3135 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3136 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3137 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3138 Testing ? "would convert" : "converted");
3139 vnodeEssence->modeBits |= 0111;
3140 vnodeEssence->changed = 1;
3141 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3142 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3143 dir->name ? dir->name : "??", name, buf);
3145 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3146 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3148 FDH_REALLYCLOSE(fdP);
3151 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3152 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3153 if (vnodeIdToClass(vnodeNumber) == vLarge
3154 && vnodeEssence->name == NULL) {
3156 if ((n = (char *)malloc(strlen(name) + 1)))
3158 vnodeEssence->name = n;
3161 /* The directory entry points to the vnode. Check to see if the
3162 * vnode points back to the directory. If not, then let the
3163 * directory claim it (else it might end up orphaned). Vnodes
3164 * already claimed by another directory are deleted from this
3165 * directory: hardlinks to the same vnode are not allowed
3166 * from different directories.
3168 if (vnodeEssence->parent != dir->vnodeNumber) {
3169 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3170 /* Vnode does not point back to this directory.
3171 * Orphaned dirs cannot claim a file (it may belong to
3172 * another non-orphaned dir).
3175 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3177 vnodeEssence->parent = dir->vnodeNumber;
3178 vnodeEssence->changed = 1;
3180 /* Vnode was claimed by another directory */
3183 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3184 } else if (vnodeNumber == 1) {
3185 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3187 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3191 CopyOnWrite(salvinfo, dir);
3192 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3197 /* This directory claims the vnode */
3198 vnodeEssence->claimed = 1;
3200 vnodeEssence->count--;
3205 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3206 VnodeClass class, Inode ino, Unique * maxu)
3208 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3209 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3210 char buf[SIZEOF_LARGEDISKVNODE];
3211 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3213 StreamHandle_t *file;
3218 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3219 fdP = IH_OPEN(vip->handle);
3220 osi_Assert(fdP != NULL);
3221 file = FDH_FDOPEN(fdP, "r+");
3222 osi_Assert(file != NULL);
3223 size = OS_SIZE(fdP->fd_fd);
3224 osi_Assert(size != -1);
3225 vip->nVnodes = (size / vcp->diskSize) - 1;
3226 if (vip->nVnodes > 0) {
3227 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3228 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3229 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3230 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3231 if (class == vLarge) {
3232 osi_Assert((vip->inodes = (Inode *)
3233 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3242 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3243 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3244 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3245 nVnodes--, vnodeIndex++) {
3246 if (vnode->type != vNull) {
3247 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3248 afs_fsize_t vnodeLength;
3249 vip->nAllocatedVnodes++;
3250 vep->count = vnode->linkCount;
3251 VNDISK_GET_LEN(vnodeLength, vnode);
3252 vep->blockCount = nBlocks(vnodeLength);
3253 vip->volumeBlockCount += vep->blockCount;
3254 vep->parent = vnode->parent;
3255 vep->unique = vnode->uniquifier;
3256 if (*maxu < vnode->uniquifier)
3257 *maxu = vnode->uniquifier;
3258 vep->modeBits = vnode->modeBits;
3259 vep->InodeNumber = VNDISK_GET_INO(vnode);
3260 vep->type = vnode->type;
3261 vep->author = vnode->author;
3262 vep->owner = vnode->owner;
3263 vep->group = vnode->group;
3264 if (vnode->type == vDirectory) {
3265 if (class != vLarge) {
3266 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3267 vip->nAllocatedVnodes--;
3268 memset(vnode, 0, sizeof(vnode));
3269 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3270 vnodeIndexOffset(vcp, vnodeNumber),
3271 (char *)&vnode, sizeof(vnode));
3272 salvinfo->VolumeChanged = 1;
3274 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3283 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3286 struct VnodeEssence *parentvp;
3292 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3293 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3294 strcat(path, OS_DIRSEP);
3295 strcat(path, vp->name);
3301 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3302 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3305 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3307 struct VnodeEssence *vep;
3310 return (1); /* Vnode zero does not exist */
3312 return (0); /* The root dir vnode is always claimed */
3313 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3314 if (!vep || !vep->claimed)
3315 return (1); /* Vnode is not claimed - it is orphaned */
3317 return (IsVnodeOrphaned(salvinfo, vep->parent));
3321 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3322 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3323 struct DirSummary *rootdir, int *rootdirfound)
3325 static struct DirSummary dir;
3326 static struct DirHandle dirHandle;
3327 struct VnodeEssence *parent;
3328 static char path[MAXPATHLEN];
3331 if (dirVnodeInfo->vnodes[i].salvaged)
3332 return; /* already salvaged */
3335 dirVnodeInfo->vnodes[i].salvaged = 1;
3337 if (dirVnodeInfo->inodes[i] == 0)
3338 return; /* Not allocated to a directory */
3340 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3341 if (dirVnodeInfo->vnodes[i].parent) {
3342 Log("Bad parent, vnode 1; %s...\n",
3343 (Testing ? "skipping" : "salvaging"));
3344 dirVnodeInfo->vnodes[i].parent = 0;
3345 dirVnodeInfo->vnodes[i].changed = 1;
3348 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3349 if (parent && parent->salvaged == 0)
3350 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3351 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3352 rootdir, rootdirfound);
3355 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3356 dir.unique = dirVnodeInfo->vnodes[i].unique;
3359 dir.parent = dirVnodeInfo->vnodes[i].parent;
3360 dir.haveDot = dir.haveDotDot = 0;
3361 dir.ds_linkH = alinkH;
3362 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3363 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3365 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3368 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3369 (Testing ? "skipping" : "salvaging"));
3372 CopyAndSalvage(salvinfo, &dir);
3374 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3377 dirHandle = dir.dirHandle;
3380 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3381 &dirVnodeInfo->vnodes[i], path);
3384 /* If enumeration failed for random reasons, we will probably delete
3385 * too much stuff, so we guard against this instead.
3387 struct judgeEntry_params judge_params;
3388 judge_params.salvinfo = salvinfo;
3389 judge_params.dir = &dir;
3391 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3394 /* Delete the old directory if it was copied in order to salvage.
3395 * CopyOnWrite has written the new inode # to the disk, but we still
3396 * have the old one in our local structure here. Thus, we idec the
3400 if (dir.copied && !Testing) {
3401 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3402 osi_Assert(code == 0);
3403 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3406 /* Remember rootdir DirSummary _after_ it has been judged */
3407 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3408 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3416 * Get a new FID that can be used to create a new file.
3418 * @param[in] volHeader vol header for the volume
3419 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3420 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3421 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3422 * updated to the new max unique if we create a new
3426 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3427 VnodeClass class, AFSFid *afid, Unique *maxunique)
3430 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3431 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3435 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3436 /* no free vnodes; make a new one */
3437 salvinfo->vnodeInfo[class].nVnodes++;
3438 salvinfo->vnodeInfo[class].vnodes =
3439 realloc(salvinfo->vnodeInfo[class].vnodes,
3440 sizeof(struct VnodeEssence) * (i+1));
3442 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3445 afid->Vnode = bitNumberToVnodeNumber(i, class);
3447 if (volHeader->uniquifier < (*maxunique + 1)) {
3448 /* header uniq is bad; it will get bumped by 2000 later */
3449 afid->Unique = *maxunique + 1 + 2000;
3452 /* header uniq seems okay; just use that */
3453 afid->Unique = *maxunique = volHeader->uniquifier++;
3458 * Create a vnode for a README file explaining not to use a recreated-root vol.
3460 * @param[in] volHeader vol header for the volume
3461 * @param[in] alinkH ihandle for i/o for the volume
3462 * @param[in] vid volume id
3463 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3464 * updated to the new max unique if we create a new
3466 * @param[out] afid FID for the new readme vnode
3467 * @param[out] ainode the inode for the new readme file
3469 * @return operation status
3474 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3475 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3479 struct VnodeDiskObject *rvnode = NULL;
3481 IHandle_t *readmeH = NULL;
3482 struct VnodeEssence *vep;
3484 time_t now = time(NULL);
3486 /* Try to make the note brief, but informative. Only administrators should
3487 * be able to read this file at first, so we can hopefully assume they
3488 * know what AFS is, what a volume is, etc. */
3490 "This volume has been salvaged, but has lost its original root directory.\n"
3491 "The root directory that exists now has been recreated from orphan files\n"
3492 "from the rest of the volume. This recreated root directory may interfere\n"
3493 "with old cached data on clients, and there is no way the salvager can\n"
3494 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3495 "use this volume, but only copy the salvaged data to a new volume.\n"
3496 "Continuing to use this volume as it exists now may cause some clients to\n"
3497 "behave oddly when accessing this volume.\n"
3498 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3499 /* ^ the person reading this probably just lost some data, so they could
3500 * use some cheering up. */
3502 /* -1 for the trailing NUL */
3503 length = sizeof(readme) - 1;
3505 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3507 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3509 /* create the inode and write the contents */
3510 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3511 salvinfo->fileSysPath, 0, vid,
3512 afid->Vnode, afid->Unique, 1);
3513 if (!VALID_INO(readmeinode)) {
3514 Log("CreateReadme: readme IH_CREATE failed\n");
3518 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3519 bytes = IH_IWRITE(readmeH, 0, readme, length);
3520 IH_RELEASE(readmeH);
3522 if (bytes != length) {
3523 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3524 (int)sizeof(readme));
3528 /* create the vnode and write it out */
3529 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3531 Log("CreateRootDir: error alloc'ing memory\n");
3535 rvnode->type = vFile;
3537 rvnode->modeBits = 0777;
3538 rvnode->linkCount = 1;
3539 VNDISK_SET_LEN(rvnode, length);
3540 rvnode->uniquifier = afid->Unique;
3541 rvnode->dataVersion = 1;
3542 VNDISK_SET_INO(rvnode, readmeinode);
3543 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3548 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3550 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3551 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3552 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3554 if (bytes != SIZEOF_SMALLDISKVNODE) {
3555 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3556 (int)SIZEOF_SMALLDISKVNODE);
3560 /* update VnodeEssence for new readme vnode */
3561 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3563 vep->blockCount = nBlocks(length);
3564 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3565 vep->parent = rvnode->parent;
3566 vep->unique = rvnode->uniquifier;
3567 vep->modeBits = rvnode->modeBits;
3568 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3569 vep->type = rvnode->type;
3570 vep->author = rvnode->author;
3571 vep->owner = rvnode->owner;
3572 vep->group = rvnode->group;
3582 *ainode = readmeinode;
3587 if (IH_DEC(alinkH, readmeinode, vid)) {
3588 Log("CreateReadme (recovery): IH_DEC failed\n");
3600 * create a root dir for a volume that lacks one.
3602 * @param[in] volHeader vol header for the volume
3603 * @param[in] alinkH ihandle for disk access for this volume group
3604 * @param[in] vid volume id we're dealing with
3605 * @param[out] rootdir populated with info about the new root dir
3606 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3607 * updated to the new max unique if we create a new
3610 * @return operation status
3615 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3616 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3620 int decroot = 0, decreadme = 0;
3621 AFSFid did, readmeid;
3624 struct VnodeDiskObject *rootvnode = NULL;
3625 struct acl_accessList *ACL;
3628 struct VnodeEssence *vep;
3630 time_t now = time(NULL);
3632 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3633 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3637 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3638 /* We don't have any large vnodes in the volume; allocate room
3639 * for one so we can recreate the root dir */
3640 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3641 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3642 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3644 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3645 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3648 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3649 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3650 if (vep->type != vNull) {
3651 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3655 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3656 &readmeinode) != 0) {
3661 /* set the DV to a very high number, so it is unlikely that we collide
3662 * with a cached DV */
3665 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3667 if (!VALID_INO(rootinode)) {
3668 Log("CreateRootDir: IH_CREATE failed\n");
3673 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3674 rootinode, &salvinfo->VolumeChanged);
3678 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3679 Log("CreateRootDir: MakeDir failed\n");
3682 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3683 Log("CreateRootDir: Create failed\n");
3687 length = Length(&rootdir->dirHandle);
3688 DZap((void *)&rootdir->dirHandle);
3690 /* create the new root dir vnode */
3691 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3693 Log("CreateRootDir: malloc failed\n");
3697 /* only give 'rl' permissions to 'system:administrators'. We do this to
3698 * try to catch the attention of an administrator, that they should not
3699 * be writing to this directory or continue to use it. */
3700 ACL = VVnodeDiskACL(rootvnode);
3701 ACL->size = sizeof(struct acl_accessList);
3702 ACL->version = ACL_ACLVERSION;
3706 ACL->entries[0].id = -204; /* system:administrators */
3707 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3709 rootvnode->type = vDirectory;
3710 rootvnode->cloned = 0;
3711 rootvnode->modeBits = 0777;
3712 rootvnode->linkCount = 2;
3713 VNDISK_SET_LEN(rootvnode, length);
3714 rootvnode->uniquifier = 1;
3715 rootvnode->dataVersion = dv;
3716 VNDISK_SET_INO(rootvnode, rootinode);
3717 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3718 rootvnode->author = 0;
3719 rootvnode->owner = 0;
3720 rootvnode->parent = 0;
3721 rootvnode->group = 0;
3722 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3724 /* write it out to disk */
3725 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3726 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3727 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3729 if (bytes != SIZEOF_LARGEDISKVNODE) {
3730 /* just cast to int and don't worry about printing real 64-bit ints;
3731 * a large disk vnode isn't anywhere near the 32-bit limit */
3732 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3733 (int)SIZEOF_LARGEDISKVNODE);
3737 /* update VnodeEssence for the new root vnode */
3738 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3740 vep->blockCount = nBlocks(length);
3741 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3742 vep->parent = rootvnode->parent;
3743 vep->unique = rootvnode->uniquifier;
3744 vep->modeBits = rootvnode->modeBits;
3745 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3746 vep->type = rootvnode->type;
3747 vep->author = rootvnode->author;
3748 vep->owner = rootvnode->owner;
3749 vep->group = rootvnode->group;
3759 /* update DirSummary for the new root vnode */
3760 rootdir->vnodeNumber = 1;
3761 rootdir->unique = 1;
3762 rootdir->haveDot = 1;
3763 rootdir->haveDotDot = 1;
3764 rootdir->rwVid = vid;
3765 rootdir->copied = 0;
3766 rootdir->parent = 0;
3767 rootdir->name = strdup(".");
3768 rootdir->vname = volHeader->name;
3769 rootdir->ds_linkH = alinkH;
3776 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3777 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3779 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3780 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3790 * salvage a volume group.
3792 * @param[in] salvinfo information for the curent salvage job
3793 * @param[in] rwIsp inode summary for rw volume
3794 * @param[in] alinkH link table inode handle
3796 * @return operation status
3800 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3802 /* This routine, for now, will only be called for read-write volumes */
3804 int BlocksInVolume = 0, FilesInVolume = 0;
3806 struct DirSummary rootdir, oldrootdir;
3807 struct VnodeInfo *dirVnodeInfo;
3808 struct VnodeDiskObject vnode;
3809 VolumeDiskData volHeader;
3811 int orphaned, rootdirfound = 0;
3812 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3813 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3814 struct VnodeEssence *vep;
3817 afs_sfsize_t nBytes;
3819 VnodeId LFVnode, ThisVnode;
3820 Unique LFUnique, ThisUnique;
3824 vid = rwIsp->volSummary->header.id;
3825 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3826 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3827 osi_Assert(nBytes == sizeof(volHeader));
3828 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3829 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3830 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3832 DistilVnodeEssence(salvinfo, vid, vLarge,
3833 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3834 DistilVnodeEssence(salvinfo, vid, vSmall,
3835 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3837 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3838 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3839 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3840 &rootdir, &rootdirfound);
3843 nt_sync(salvinfo->fileSysDevice);
3845 sync(); /* This used to be done lower level, for every dir */
3852 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3854 Log("Cannot find root directory for volume %lu; attempting to create "
3855 "a new one\n", afs_printable_uint32_lu(vid));
3857 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3862 salvinfo->VolumeChanged = 1;
3866 /* Parse each vnode looking for orphaned vnodes and
3867 * connect them to the tree as orphaned (if requested).
3869 oldrootdir = rootdir;
3870 for (class = 0; class < nVNODECLASSES; class++) {
3871 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3872 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3873 ThisVnode = bitNumberToVnodeNumber(v, class);
3874 ThisUnique = vep->unique;
3876 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3877 continue; /* Ignore unused, claimed, and root vnodes */
3879 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3880 * entry in this vnode had incremented the parent link count (In
3881 * JudgeEntry()). We need to go to the parent and decrement that
3882 * link count. But if the parent's unique is zero, then the parent
3883 * link count was not incremented in JudgeEntry().
3885 if (class == vLarge) { /* directory vnode */
3886 pv = vnodeIdToBitNumber(vep->parent);
3887 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3888 if (vep->parent == 1 && newrootdir) {
3889 /* this vnode's parent was the volume root, and
3890 * we just created the volume root. So, the parent
3891 * dir didn't exist during JudgeEntry, so the link
3892 * count was not inc'd there, so don't dec it here.
3898 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3904 continue; /* If no rootdir, can't attach orphaned files */
3906 /* Here we attach orphaned files and directories into the
3907 * root directory, LVVnode, making sure link counts stay correct.
3909 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3910 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3911 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3913 /* Update this orphaned vnode's info. Its parent info and
3914 * link count (do for orphaned directories and files).
3916 vep->parent = LFVnode; /* Parent is the root dir */
3917 vep->unique = LFUnique;
3920 vep->count--; /* Inc link count (root dir will pt to it) */
3922 /* If this orphaned vnode is a directory, change '..'.
3923 * The name of the orphaned dir/file is unknown, so we
3924 * build a unique name. No need to CopyOnWrite the directory
3925 * since it is not connected to tree in BK or RO volume and
3926 * won't be visible there.
3928 if (class == vLarge) {
3932 /* Remove and recreate the ".." entry in this orphaned directory */
3933 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3934 salvinfo->vnodeInfo[class].inodes[v],
3935 &salvinfo->VolumeChanged);
3937 pa.Unique = LFUnique;
3938 osi_Assert(Delete(&dh, "..") == 0);
3939 osi_Assert(Create(&dh, "..", &pa) == 0);
3941 /* The original parent's link count was decremented above.
3942 * Here we increment the new parent's link count.
3944 pv = vnodeIdToBitNumber(LFVnode);
3945 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3949 /* Go to the root dir and add this entry. The link count of the
3950 * root dir was incremented when ".." was created. Try 10 times.
3952 for (j = 0; j < 10; j++) {
3953 pa.Vnode = ThisVnode;
3954 pa.Unique = ThisUnique;
3956 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3958 vLarge) ? "__ORPHANDIR__" :
3959 "__ORPHANFILE__"), ThisVnode,
3962 CopyOnWrite(salvinfo, &rootdir);
3963 code = Create(&rootdir.dirHandle, npath, &pa);
3967 ThisUnique += 50; /* Try creating a different file */
3969 osi_Assert(code == 0);
3970 Log("Attaching orphaned %s to volume's root dir as %s\n",
3971 ((class == vLarge) ? "directory" : "file"), npath);
3973 } /* for each vnode in the class */
3974 } /* for each class of vnode */
3976 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3978 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3980 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3982 osi_Assert(code == 0);
3983 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3986 DFlush(); /* Flush the changes */
3987 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3988 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3989 orphans = ORPH_IGNORE;
3992 /* Write out all changed vnodes. Orphaned files and directories
3993 * will get removed here also (if requested).
3995 for (class = 0; class < nVNODECLASSES; class++) {
3996 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3997 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3998 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3999 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4000 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4001 for (i = 0; i < nVnodes; i++) {
4002 struct VnodeEssence *vnp = &vnodes[i];
4003 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4005 /* If the vnode is good but is unclaimed (not listed in
4006 * any directory entries), then it is orphaned.
4009 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4010 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4014 if (vnp->changed || vnp->count) {
4017 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4018 vnodeIndexOffset(vcp, vnodeNumber),
4019 (char *)&vnode, sizeof(vnode));
4020 osi_Assert(nBytes == sizeof(vnode));
4022 vnode.parent = vnp->parent;
4023 oldCount = vnode.linkCount;
4024 vnode.linkCount = vnode.linkCount - vnp->count;
4027 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4029 if (!vnp->todelete) {
4030 /* Orphans should have already been attached (if requested) */
4031 osi_Assert(orphans != ORPH_ATTACH);
4032 oblocks += vnp->blockCount;
4035 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4037 BlocksInVolume -= vnp->blockCount;
4039 if (VNDISK_GET_INO(&vnode)) {
4041 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4042 osi_Assert(code == 0);
4044 memset(&vnode, 0, sizeof(vnode));
4046 } else if (vnp->count) {
4048 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4051 vnode.modeBits = vnp->modeBits;
4054 vnode.dataVersion++;
4057 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4058 vnodeIndexOffset(vcp, vnodeNumber),
4059 (char *)&vnode, sizeof(vnode));
4060 osi_Assert(nBytes == sizeof(vnode));
4062 salvinfo->VolumeChanged = 1;
4066 if (!Showmode && ofiles) {
4067 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4069 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4073 for (class = 0; class < nVNODECLASSES; class++) {
4074 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4075 for (i = 0; i < vip->nVnodes; i++)
4076 if (vip->vnodes[i].name)
4077 free(vip->vnodes[i].name);
4084 /* Set correct resource utilization statistics */
4085 volHeader.filecount = FilesInVolume;
4086 volHeader.diskused = BlocksInVolume;
4088 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4089 if (volHeader.uniquifier < (maxunique + 1)) {
4091 Log("Volume uniquifier is too low; fixed\n");
4092 /* Plus 2,000 in case there are workstations out there with
4093 * cached vnodes that have since been deleted
4095 volHeader.uniquifier = (maxunique + 1 + 2000);
4099 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4100 "Only use this salvaged volume to copy data to another volume; "
4101 "do not continue to use this volume (%lu) as-is.\n",
4102 afs_printable_uint32_lu(vid));
4105 #ifdef FSSYNC_BUILD_CLIENT
4106 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4107 afs_int32 fsync_code;
4109 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4111 Log("Error trying to tell the fileserver to break callbacks for "
4112 "changed volume %lu; error code %ld\n",
4113 afs_printable_uint32_lu(vid),
4114 afs_printable_int32_ld(fsync_code));
4116 salvinfo->VolumeChanged = 0;
4119 #endif /* FSSYNC_BUILD_CLIENT */
4121 /* Turn off the inUse bit; the volume's been salvaged! */
4122 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4123 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4124 volHeader.inService = 1; /* allow service again */
4125 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4126 volHeader.dontSalvage = DONT_SALVAGE;
4127 salvinfo->VolumeChanged = 0;
4129 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4130 osi_Assert(nBytes == sizeof(volHeader));
4133 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4134 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4135 FilesInVolume, BlocksInVolume);
4138 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4139 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4145 ClearROInUseBit(struct VolumeSummary *summary)
4147 IHandle_t *h = summary->volumeInfoHandle;
4148 afs_sfsize_t nBytes;
4150 VolumeDiskData volHeader;
4152 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4153 osi_Assert(nBytes == sizeof(volHeader));
4154 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4155 volHeader.inUse = 0;
4156 volHeader.needsSalvaged = 0;
4157 volHeader.inService = 1;
4158 volHeader.dontSalvage = DONT_SALVAGE;
4160 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4161 osi_Assert(nBytes == sizeof(volHeader));
4166 * Possible delete the volume.
4168 * deleteMe - Always do so, only a partial volume.
4171 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4172 char *message, int deleteMe, int check)
4174 if (readOnly(isp) || deleteMe) {
4175 if (isp->volSummary && isp->volSummary->fileName) {
4178 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4180 Log("It will be deleted on this server (you may find it elsewhere)\n");
4183 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4185 Log("it will be deleted instead. It should be recloned.\n");
4190 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4192 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4194 Log("Error %ld destroying volume disk header for volume %lu\n",
4195 afs_printable_int32_ld(code),
4196 afs_printable_uint32_lu(isp->volumeId));
4199 /* make sure we actually delete the fileName file; ENOENT
4200 * is fine, since VDestroyVolumeDiskHeader probably already
4202 if (unlink(path) && errno != ENOENT) {
4203 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4207 } else if (!check) {
4208 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4210 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4214 #ifdef AFS_DEMAND_ATTACH_FS
4216 * Locks a volume on disk for salvaging.
4218 * @param[in] volumeId volume ID to lock
4220 * @return operation status
4222 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4223 * checked out and locked again
4228 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4233 /* should always be WRITE_LOCK, but keep the lock-type logic all
4234 * in one place, in VVolLockType. Params will be ignored, but
4235 * try to provide what we're logically doing. */
4236 locktype = VVolLockType(V_VOLUPD, 1);
4238 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4240 if (code == EBUSY) {
4241 Abort("Someone else appears to be using volume %lu; Aborted\n",
4242 afs_printable_uint32_lu(volumeId));
4244 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4245 afs_printable_int32_ld(code),
4246 afs_printable_uint32_lu(volumeId));
4249 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4250 if (code == SYNC_DENIED) {
4251 /* need to retry checking out volumes */
4254 if (code != SYNC_OK) {
4255 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4256 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4259 /* set inUse = programType in the volume header to ensure that nobody
4260 * tries to use this volume again without salvaging, if we somehow crash
4261 * or otherwise exit before finishing the salvage.
4265 struct VolumeHeader header;
4266 struct VolumeDiskHeader diskHeader;
4267 struct VolumeDiskData volHeader;
4269 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4274 DiskToVolumeHeader(&header, &diskHeader);
4276 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4277 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4278 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4284 volHeader.inUse = programType;
4286 /* If we can't re-write the header, bail out and error. We don't
4287 * assert when reading the header, since it's possible the
4288 * header isn't really there (when there's no data associated
4289 * with the volume; we just delete the vol header file in that
4290 * case). But if it's there enough that we can read it, but
4291 * somehow we cannot write to it to signify we're salvaging it,
4292 * we've got a big problem and we cannot continue. */
4293 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4300 #endif /* AFS_DEMAND_ATTACH_FS */
4303 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4308 memset(&res, 0, sizeof(res));
4310 for (i = 0; i < 3; i++) {
4311 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4312 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4314 if (code == SYNC_OK) {
4316 } else if (code == SYNC_DENIED) {
4317 #ifdef DEMAND_ATTACH_ENABLE
4318 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4320 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4322 Abort("Salvage aborted\n");
4323 } else if (code == SYNC_BAD_COMMAND) {
4324 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4326 #ifdef DEMAND_ATTACH_ENABLE
4327 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4329 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4331 Abort("Salvage aborted\n");
4334 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4335 FSYNC_clientFinis();
4339 if (code != SYNC_OK) {
4340 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4341 Abort("Salvage aborted\n");
4346 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4350 for (i = 0; i < 3; i++) {
4351 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4352 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4354 if (code == SYNC_OK) {
4356 } else if (code == SYNC_DENIED) {
4357 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4358 } else if (code == SYNC_BAD_COMMAND) {
4359 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4361 #ifdef DEMAND_ATTACH_ENABLE
4362 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4364 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4369 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4370 FSYNC_clientFinis();
4377 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4379 /* Volume parameter is passed in case iopen is upgraded in future to
4380 * require a volume Id to be passed
4383 IHandle_t *srcH, *destH;
4384 FdHandle_t *srcFdP, *destFdP;
4386 afs_foff_t size = 0;
4388 IH_INIT(srcH, device, rwvolume, inode1);
4389 srcFdP = IH_OPEN(srcH);
4390 osi_Assert(srcFdP != NULL);
4391 IH_INIT(destH, device, rwvolume, inode2);
4392 destFdP = IH_OPEN(destH);
4393 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4394 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4397 osi_Assert(nBytes == 0);
4398 FDH_REALLYCLOSE(srcFdP);
4399 FDH_REALLYCLOSE(destFdP);
4406 PrintInodeList(struct SalvInfo *salvinfo)
4408 struct ViceInodeInfo *ip;
4409 struct ViceInodeInfo *buf;
4410 struct afs_stat status;
4414 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4415 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4416 osi_Assert(buf != NULL);
4417 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4418 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4419 for (ip = buf; nInodes--; ip++) {
4420 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4421 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4422 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4423 ip->u.param[2], ip->u.param[3]);
4429 PrintInodeSummary(struct SalvInfo *salvinfo)
4432 struct InodeSummary *isp;
4434 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4435 isp = &salvinfo->inodeSummary[i];
4436 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4441 PrintVolumeSummary(struct SalvInfo *salvinfo)
4444 struct VolumeSummary *vsp;
4446 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4447 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4457 osi_Assert(0); /* Fork is never executed in the NT code path */
4461 #ifdef AFS_DEMAND_ATTACH_FS
4462 if ((f == 0) && (programType == salvageServer)) {
4463 /* we are a salvageserver child */
4464 #ifdef FSSYNC_BUILD_CLIENT
4465 VChildProcReconnectFS_r();
4467 #ifdef SALVSYNC_BUILD_CLIENT
4471 #endif /* AFS_DEMAND_ATTACH_FS */
4472 #endif /* !AFS_NT40_ENV */
4482 #ifdef AFS_DEMAND_ATTACH_FS
4483 if (programType == salvageServer) {
4484 #ifdef SALVSYNC_BUILD_CLIENT
4487 #ifdef FSSYNC_BUILD_CLIENT
4491 #endif /* AFS_DEMAND_ATTACH_FS */
4494 if (main_thread != pthread_self())
4495 pthread_exit((void *)code);
4508 pid = wait(&status);
4509 osi_Assert(pid != -1);
4510 if (WCOREDUMP(status))
4511 Log("\"%s\" core dumped!\n", prog);
4512 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4518 TimeStamp(time_t clock, int precision)
4521 static char timestamp[20];
4522 lt = localtime(&clock);
4524 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4526 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4531 CheckLogFile(char * log_path)
4533 char oldSlvgLog[AFSDIR_PATH_MAX];
4535 #ifndef AFS_NT40_ENV
4542 strcpy(oldSlvgLog, log_path);
4543 strcat(oldSlvgLog, ".old");
4545 renamefile(log_path, oldSlvgLog);
4546 logFile = afs_fopen(log_path, "a");
4548 if (!logFile) { /* still nothing, use stdout */
4552 #ifndef AFS_NAMEI_ENV
4553 AFS_DEBUG_IOPS_LOG(logFile);
4558 #ifndef AFS_NT40_ENV
4560 TimeStampLogFile(char * log_path)
4562 char stampSlvgLog[AFSDIR_PATH_MAX];
4567 lt = localtime(&now);
4568 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4569 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4570 log_path, lt->tm_year + 1900,
4571 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4574 /* try to link the logfile to a timestamped filename */
4575 /* if it fails, oh well, nothing we can do */
4576 link(log_path, stampSlvgLog);
4585 #ifndef AFS_NT40_ENV
4587 printf("Can't show log since using syslog.\n");
4598 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4601 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4604 while (fgets(line, sizeof(line), logFile))
4611 Log(const char *format, ...)
4617 va_start(args, format);
4618 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4620 #ifndef AFS_NT40_ENV
4622 syslog(LOG_INFO, "%s", tmp);
4626 gettimeofday(&now, 0);
4627 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4633 Abort(const char *format, ...)
4638 va_start(args, format);
4639 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4641 #ifndef AFS_NT40_ENV
4643 syslog(LOG_INFO, "%s", tmp);
4647 fprintf(logFile, "%s", tmp);
4659 ToString(const char *s)
4662 p = (char *)malloc(strlen(s) + 1);
4663 osi_Assert(p != NULL);
4668 /* Remove the FORCESALVAGE file */
4670 RemoveTheForce(char *path)
4673 struct afs_stat force; /* so we can use afs_stat to find it */
4674 strcpy(target,path);
4675 strcat(target,"/FORCESALVAGE");
4676 if (!Testing && ForceSalvage) {
4677 if (afs_stat(target,&force) == 0) unlink(target);
4681 #ifndef AFS_AIX32_ENV
4683 * UseTheForceLuke - see if we can use the force
4686 UseTheForceLuke(char *path)
4688 struct afs_stat force;
4690 strcpy(target,path);
4691 strcat(target,"/FORCESALVAGE");
4693 return (afs_stat(target, &force) == 0);
4697 * UseTheForceLuke - see if we can use the force
4700 * The VRMIX fsck will not muck with the filesystem it is supposedly
4701 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4702 * muck directly with the root inode, which is within the normal
4704 * ListViceInodes() has a side effect of setting ForceSalvage if
4705 * it detects a need, based on root inode examination.
4708 UseTheForceLuke(char *path)
4711 return 0; /* sorry OB1 */
4716 /* NT support routines */
4718 static char execpathname[MAX_PATH];
4720 nt_SalvagePartition(char *partName, int jobn)
4725 if (!*execpathname) {
4726 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4727 if (!n || n == 1023)
4730 job.cj_magic = SALVAGER_MAGIC;
4731 job.cj_number = jobn;
4732 (void)strcpy(job.cj_part, partName);
4733 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4738 nt_SetupPartitionSalvage(void *datap, int len)
4740 childJob_t *jobp = (childJob_t *) datap;
4741 char logname[AFSDIR_PATH_MAX];
4743 if (len != sizeof(childJob_t))
4745 if (jobp->cj_magic != SALVAGER_MAGIC)
4750 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4752 logFile = afs_fopen(logname, "w");
4760 #endif /* AFS_NT40_ENV */