2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/afs_assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
193 #include "vol_internal.h"
195 #include <afs/prs_fs.h>
197 #ifdef FSSYNC_BUILD_CLIENT
198 #include "vg_cache.h"
205 /*@+fcnmacros +macrofcndecl@*/
208 extern off64_t afs_lseek(int FD, off64_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
211 #define afs_stat stat64
212 #define afs_fstat fstat64
213 #define afs_open open64
214 #define afs_fopen fopen64
215 #else /* !O_LARGEFILE */
217 extern off_t afs_lseek(int FD, off_t O, int F);
218 #endif /*S_SPLINT_S */
219 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
220 #define afs_stat stat
221 #define afs_fstat fstat
222 #define afs_open open
223 #define afs_fopen fopen
224 #endif /* !O_LARGEFILE */
225 /*@=fcnmacros =macrofcndecl@*/
228 extern void *calloc();
230 static char *TimeStamp(time_t clock, int precision);
233 int debug; /* -d flag */
234 extern int Testing; /* -n flag */
235 int ListInodeOption; /* -i flag */
236 int ShowRootFiles; /* -r flag */
237 int RebuildDirs; /* -sal flag */
238 int Parallel = 4; /* -para X flag */
239 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
240 int forceR = 0; /* -b flag */
241 int ShowLog = 0; /* -showlog flag */
242 int ShowSuid = 0; /* -showsuid flag */
243 int ShowMounts = 0; /* -showmounts flag */
244 int orphans = ORPH_IGNORE; /* -orphans option */
249 int useSyslog = 0; /* -syslog flag */
250 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
259 #define MAXPARALLEL 32
261 int OKToZap; /* -o flag */
262 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
263 * in the volume header */
265 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
267 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
270 * information that is 'global' to a particular salvage job.
273 Device fileSysDevice; /**< The device number of the current partition
275 char fileSysPath[8]; /**< The path of the mounted partition currently
276 * being salvaged, i.e. the directory containing
277 * the volume headers */
278 char *fileSysPathName; /**< NT needs this to make name pretty log. */
279 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
280 int VGLinkH_cnt; /**< # of references to lnk handle. */
281 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
284 char *fileSysDeviceName; /**< The block device where the file system being
285 * salvaged was mounted */
286 char *filesysfulldev;
288 int VolumeChanged; /**< Set by any routine which would change the
289 * volume in a way which would require callbacks
290 * to be broken if the volume was put back on
291 * on line by an active file server */
293 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
294 * header dealt with */
296 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
297 int inodeFd; /**< File descriptor for inode file */
299 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
300 int nVolumes; /**< Number of volumes (read-write and read-only)
301 * in volume summary */
302 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
305 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
306 * vnodes in the volume that
307 * we are currently looking
315 /* Forward declarations */
316 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
317 static int AskVolumeSummary(struct SalvInfo *salvinfo,
318 VolumeId singleVolumeNumber);
320 #ifdef AFS_DEMAND_ATTACH_FS
321 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
322 #endif /* AFS_DEMAND_ATTACH_FS */
324 /* Uniquifier stored in the Inode */
329 return (u & 0x3fffff);
331 #if defined(AFS_SGI_EXMAG)
332 return (u & SGI_UNIQMASK);
335 #endif /* AFS_SGI_EXMAG */
342 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
344 return 0; /* otherwise may be transient, e.g. EMFILE */
349 char *save_args[MAX_ARGS];
351 extern pthread_t main_thread;
352 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
356 * Get the salvage lock if not already held. Hold until process exits.
358 * @param[in] locktype READ_LOCK or WRITE_LOCK
361 _ObtainSalvageLock(int locktype)
363 struct VLockFile salvageLock;
368 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
370 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
373 "salvager: There appears to be another salvager running! "
378 "salvager: Error %d trying to acquire salvage lock! "
384 ObtainSalvageLock(void)
386 _ObtainSalvageLock(WRITE_LOCK);
389 ObtainSharedSalvageLock(void)
391 _ObtainSalvageLock(READ_LOCK);
395 #ifdef AFS_SGI_XFS_IOPS_ENV
396 /* Check if the given partition is mounted. For XFS, the root inode is not a
397 * constant. So we check the hard way.
400 IsPartitionMounted(char *part)
403 struct mntent *mntent;
405 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
406 while (mntent = getmntent(mntfp)) {
407 if (!strcmp(part, mntent->mnt_dir))
412 return mntent ? 1 : 1;
415 /* Check if the given inode is the root of the filesystem. */
416 #ifndef AFS_SGI_XFS_IOPS_ENV
418 IsRootInode(struct afs_stat *status)
421 * The root inode is not a fixed value in XFS partitions. So we need to
422 * see if the partition is in the list of mounted partitions. This only
423 * affects the SalvageFileSys path, so we check there.
425 return (status->st_ino == ROOTINODE);
430 #ifndef AFS_NAMEI_ENV
431 /* We don't want to salvage big files filesystems, since we can't put volumes on
435 CheckIfBigFilesFS(char *mountPoint, char *devName)
437 struct superblock fs;
440 if (strncmp(devName, "/dev/", 5)) {
441 (void)sprintf(name, "/dev/%s", devName);
443 (void)strcpy(name, devName);
446 if (ReadSuper(&fs, name) < 0) {
447 Log("Unable to read superblock. Not salvaging partition %s.\n",
451 if (IsBigFilesFileSystem(&fs)) {
452 Log("Partition %s is a big files filesystem, not salvaging.\n",
462 #define HDSTR "\\Device\\Harddisk"
463 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
465 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
470 static int dowarn = 1;
472 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
474 if (strncmp(res, HDSTR, HDLEN)) {
477 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
478 res, HDSTR, p1->devName);
482 d1 = atoi(&res[HDLEN]);
484 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
486 if (strncmp(res, HDSTR, HDLEN)) {
489 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
490 res, HDSTR, p2->devName);
494 d2 = atoi(&res[HDLEN]);
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
584 osi_Assert(pid != -1);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
589 osi_Assert(j < numjobs);
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, '/');
720 ptr = (char *)strrchr(pbuffer, '/');
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s\\", salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 AskOffline(salvinfo, singleVolumeNumber);
794 #ifdef AFS_DEMAND_ATTACH_FS
795 if (LockVolume(salvinfo, singleVolumeNumber)) {
798 #endif /* AFS_DEMAND_ATTACH_FS */
801 VLockPartition(partP->name);
805 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
808 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
809 partP->name, name, (Testing ? "(READONLY mode)" : ""));
811 Log("***Forced salvage of all volumes on this partition***\n");
816 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
823 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
824 while ((dp = readdir(dirp))) {
825 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
826 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
828 Log("Removing old salvager temp files %s\n", dp->d_name);
829 strcpy(npath, salvinfo->fileSysPath);
831 strcat(npath, dp->d_name);
837 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
839 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
840 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
842 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
846 inodeFile = fopen(inodeListPath, "w+b");
848 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
851 /* Using nt_unlink here since we're really using the delete on close
852 * semantics of unlink. In most places in the salvager, we really do
853 * mean to unlink the file at that point. Those places have been
854 * modified to actually do that so that the NT crt can be used there.
856 code = nt_unlink(inodeListPath);
858 code = unlink(inodeListPath);
861 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
864 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
868 salvinfo->inodeFd = fileno(inodeFile);
869 if (salvinfo->inodeFd == -1)
870 Abort("Temporary file %s is missing...\n", inodeListPath);
871 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
872 if (ListInodeOption) {
873 PrintInodeList(salvinfo);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
929 #ifdef AFS_DEMAND_ATTACH_FS
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS */
934 AskOnline(salvinfo, singleVolumeNumber);
936 /* Step through the volumeSummary list and set all volumes on-line.
937 * The volumes were taken off-line in GetVolumeSummary.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
944 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
945 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
948 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
952 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
955 sprintf(path, "%s/%s", salvinfo->fileSysPath, vsp->fileName);
958 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
961 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
963 Log("Error %ld destroying volume disk header for volume %lu\n",
964 afs_printable_int32_ld(code),
965 afs_printable_uint32_lu(vsp->header.id));
968 /* make sure we actually delete the fileName file; ENOENT
969 * is fine, since VDestroyVolumeDiskHeader probably already
971 if (unlink(path) && errno != ENOENT) {
972 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 CompareInodes(const void *_p1, const void *_p2)
981 const struct ViceInodeInfo *p1 = _p1;
982 const struct ViceInodeInfo *p2 = _p2;
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 VolumeId p1rwid, p2rwid;
987 (p1->u.vnode.vnodeNumber ==
988 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
990 (p2->u.vnode.vnodeNumber ==
991 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
999 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1000 if (p1->u.vnode.volumeId == p1rwid)
1002 if (p2->u.vnode.volumeId == p2rwid)
1004 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1006 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1007 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1008 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1010 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1012 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1014 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1016 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1018 /* The following tests are reversed, so that the most desirable
1019 * of several similar inodes comes first */
1020 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1076 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1077 struct InodeSummary *summary)
1079 VolumeId volume = ip->u.vnode.volumeId;
1080 VolumeId rwvolume = volume;
1085 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1087 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1089 rwvolume = ip->u.special.parentId;
1090 /* This isn't quite right, as there could (in error) be different
1091 * parent inodes in different special vnodes */
1093 if (maxunique < ip->u.vnode.vnodeUniquifier)
1094 maxunique = ip->u.vnode.vnodeUniquifier;
1098 summary->volumeId = volume;
1099 summary->RWvolumeId = rwvolume;
1100 summary->nInodes = n;
1101 summary->nSpecialInodes = nSpecial;
1102 summary->maxUniquifier = maxunique;
1106 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1108 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1109 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1110 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1115 * Collect list of inodes in file named by path. If a truly fatal error,
1116 * unlink the file and abort. For lessor errors, return -1. The file will
1117 * be unlinked by the caller.
1120 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1122 struct afs_stat status;
1125 struct ViceInodeInfo *ip, *ip_save;
1126 struct InodeSummary summary;
1127 char summaryFileName[50];
1130 char *dev = salvinfo->fileSysPath;
1131 char *wpath = salvinfo->fileSysPath;
1133 char *dev = salvinfo->fileSysDeviceName;
1134 char *wpath = salvinfo->filesysfulldev;
1136 char *part = salvinfo->fileSysPath;
1140 /* This file used to come from vfsck; cobble it up ourselves now... */
1142 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1143 singleVolumeNumber ? OnlyOneVolume : 0,
1144 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1146 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1149 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1151 if (forceSal && !ForceSalvage) {
1152 Log("***Forced salvage of all volumes on this partition***\n");
1155 fseek(inodeFile, 0L, SEEK_SET);
1156 salvinfo->inodeFd = fileno(inodeFile);
1157 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1158 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1160 tdir = (tmpdir ? tmpdir : part);
1162 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1163 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1165 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1166 "%s/salvage.temp.%d", tdir, getpid());
1168 summaryFile = afs_fopen(summaryFileName, "a+");
1169 if (summaryFile == NULL) {
1170 Abort("Unable to create inode summary file\n");
1174 /* Using nt_unlink here since we're really using the delete on close
1175 * semantics of unlink. In most places in the salvager, we really do
1176 * mean to unlink the file at that point. Those places have been
1177 * modified to actually do that so that the NT crt can be used there.
1179 code = nt_unlink(summaryFileName);
1181 code = unlink(summaryFileName);
1184 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1187 if (!canfork || debug || Fork() == 0) {
1189 unsigned long st_size=(unsigned long) status.st_size;
1190 nInodes = st_size / sizeof(struct ViceInodeInfo);
1192 fclose(summaryFile);
1193 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1194 RemoveTheForce(salvinfo->fileSysPath);
1196 struct VolumeSummary *vsp;
1199 GetVolumeSummary(salvinfo, singleVolumeNumber);
1201 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1203 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1206 Log("%s vice inodes on %s; not salvaged\n",
1207 singleVolumeNumber ? "No applicable" : "No", dev);
1210 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1212 fclose(summaryFile);
1214 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1217 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1218 fclose(summaryFile);
1219 Abort("Unable to read inode table; %s not salvaged\n", dev);
1221 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1222 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1223 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1230 CountVolumeInodes(ip, nInodes, &summary);
1231 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1232 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1233 fclose(summaryFile);
1236 summary.index += (summary.nInodes);
1237 nInodes -= summary.nInodes;
1238 ip += summary.nInodes;
1241 ip = ip_save = NULL;
1242 /* Following fflush is not fclose, because if it was debug mode would not work */
1243 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1244 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1245 fclose(summaryFile);
1248 if (canfork && !debug) {
1253 if (Wait("Inode summary") == -1) {
1254 fclose(summaryFile);
1255 Exit(1); /* salvage of this partition aborted */
1258 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1259 if (status.st_size != 0) {
1261 unsigned long st_status=(unsigned long)status.st_size;
1262 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1263 osi_Assert(salvinfo->inodeSummary != NULL);
1264 /* For GNU we need to do lseek to get the file pointer moved. */
1265 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1266 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1267 osi_Assert(ret == st_status);
1269 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1270 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1271 salvinfo->inodeSummary[i].volSummary = NULL;
1273 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1274 fclose(summaryFile);
1278 /* Comparison routine for volume sort.
1279 This is setup so that a read-write volume comes immediately before
1280 any read-only clones of that volume */
1282 CompareVolumes(const void *_p1, const void *_p2)
1284 const struct VolumeSummary *p1 = _p1;
1285 const struct VolumeSummary *p2 = _p2;
1286 if (p1->header.parent != p2->header.parent)
1287 return p1->header.parent < p2->header.parent ? -1 : 1;
1288 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1290 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1292 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1296 * Gleans volumeSummary information by asking the fileserver
1298 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1299 * salvaging a whole partition
1301 * @return whether we obtained the volume summary information or not
1302 * @retval 0 success; we obtained the volume summary information
1303 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1305 * @retval 1 we did not get the volume summary information; either the
1306 * fileserver responded with an error, or we are not supposed to
1307 * ask the fileserver for the information (e.g. we are salvaging
1308 * the entire partition or we are not the salvageserver)
1310 * @note for non-DAFS, always returns 1
1313 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1316 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1317 if (programType == salvageServer) {
1318 if (singleVolumeNumber) {
1319 FSSYNC_VGQry_response_t q_res;
1321 struct VolumeSummary *vsp;
1323 struct VolumeDiskHeader diskHdr;
1325 memset(&res, 0, sizeof(res));
1327 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1330 * We must wait for the partition to finish scanning before
1331 * can continue, since we will not know if we got the entire
1332 * VG membership unless the partition is fully scanned.
1333 * We could, in theory, just scan the partition ourselves if
1334 * the VG cache is not ready, but we would be doing the exact
1335 * same scan the fileserver is doing; it will almost always
1336 * be faster to wait for the fileserver. The only exceptions
1337 * are if the partition does not take very long to scan, and
1338 * in that case it's fast either way, so who cares?
1340 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1341 Log("waiting for fileserver to finish scanning partition %s...\n",
1342 salvinfo->fileSysPartition->name);
1344 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1345 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1346 * just so small partitions don't need to wait over 10
1347 * seconds every time, and large partitions are generally
1348 * polled only once every ten seconds. */
1349 sleep((i > 10) ? (i = 10) : i);
1351 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1355 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1356 /* This can happen if there's no header for the volume
1357 * we're salvaging, or no headers exist for the VG (if
1358 * we're salvaging an RW). Act as if we got a response
1359 * with no VG members. The headers may be created during
1360 * salvaging, if there are inodes in this VG. */
1362 memset(&q_res, 0, sizeof(q_res));
1363 q_res.rw = singleVolumeNumber;
1367 Log("fileserver refused VGCQuery request for volume %lu on "
1368 "partition %s, code %ld reason %ld\n",
1369 afs_printable_uint32_lu(singleVolumeNumber),
1370 salvinfo->fileSysPartition->name,
1371 afs_printable_int32_ld(code),
1372 afs_printable_int32_ld(res.hdr.reason));
1376 if (q_res.rw != singleVolumeNumber) {
1377 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1378 afs_printable_uint32_lu(singleVolumeNumber),
1379 afs_printable_uint32_lu(q_res.rw));
1380 #ifdef SALVSYNC_BUILD_CLIENT
1381 if (SALVSYNC_LinkVolume(q_res.rw,
1383 salvinfo->fileSysPartition->name,
1385 Log("schedule request failed\n");
1387 #endif /* SALVSYNC_BUILD_CLIENT */
1388 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1391 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1392 osi_Assert(salvinfo->volumeSummaryp != NULL);
1394 salvinfo->nVolumes = 0;
1395 vsp = salvinfo->volumeSummaryp;
1397 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1398 char name[VMAXPATHLEN];
1400 if (!q_res.children[i]) {
1404 /* AskOffline for singleVolumeNumber was called much earlier */
1405 if (q_res.children[i] != singleVolumeNumber) {
1406 AskOffline(salvinfo, q_res.children[i]);
1407 if (LockVolume(salvinfo, q_res.children[i])) {
1413 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1415 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1416 afs_printable_uint32_lu(q_res.children[i]));
1421 DiskToVolumeHeader(&vsp->header, &diskHdr);
1422 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1423 vsp->fileName = ToString(name);
1424 salvinfo->nVolumes++;
1428 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1433 Log("Cannot get volume summary from fileserver; falling back to scanning "
1434 "entire partition\n");
1437 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1442 * count how many volume headers are found by VWalkVolumeHeaders.
1444 * @param[in] dp the disk partition (unused)
1445 * @param[in] name full path to the .vol header (unused)
1446 * @param[in] hdr the header data (unused)
1447 * @param[in] last whether this is the last try or not (unused)
1448 * @param[in] rock actually an afs_int32*; the running count of how many
1449 * volumes we have found
1454 CountHeader(struct DiskPartition64 *dp, const char *name,
1455 struct VolumeDiskHeader *hdr, int last, void *rock)
1457 afs_int32 *nvols = (afs_int32 *)rock;
1463 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1466 struct SalvageScanParams {
1467 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1468 * vol id of the VG we're salvaging */
1469 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1470 * we're filling in */
1471 afs_int32 nVolumes; /**< # of vols we've encountered */
1472 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1473 * # of vols we've alloc'd memory for) */
1474 int retry; /**< do we need to retry vol lock/checkout? */
1475 struct SalvInfo *salvinfo; /**< salvage job info */
1479 * records volume summary info found from VWalkVolumeHeaders.
1481 * Found volumes are also taken offline if they are in the specific volume
1482 * group we are looking for.
1484 * @param[in] dp the disk partition
1485 * @param[in] name full path to the .vol header
1486 * @param[in] hdr the header data
1487 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1488 * @param[in] rock actually a struct SalvageScanParams*, containing the
1489 * information needed to record the volume summary data
1491 * @return operation status
1493 * @retval -1 volume locking raced with fileserver restart; checking out
1494 * and locking volumes needs to be retried
1495 * @retval 1 volume header is mis-named and should be deleted
1498 RecordHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 char nameShouldBe[64];
1502 struct SalvageScanParams *params;
1503 struct VolumeSummary summary;
1504 VolumeId singleVolumeNumber;
1505 struct SalvInfo *salvinfo;
1507 params = (struct SalvageScanParams *)rock;
1509 singleVolumeNumber = params->singleVolumeNumber;
1510 salvinfo = params->salvinfo;
1512 DiskToVolumeHeader(&summary.header, hdr);
1514 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1515 && summary.header.parent != singleVolumeNumber) {
1517 if (programType == salvageServer) {
1518 #ifdef SALVSYNC_BUILD_CLIENT
1519 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1520 summary.header.id, summary.header.parent);
1521 if (SALVSYNC_LinkVolume(summary.header.parent,
1525 Log("schedule request failed\n");
1528 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1531 Log("%u is a read-only volume; not salvaged\n",
1532 singleVolumeNumber);
1537 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1538 || summary.header.parent == singleVolumeNumber) {
1540 /* check if the header file is incorrectly named */
1542 const char *base = strrchr(name, '/');
1549 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1550 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1553 if (strcmp(nameShouldBe, base)) {
1554 /* .vol file has wrong name; retry/delete */
1558 if (!badname || last) {
1559 /* only offline the volume if the header is good, or if this is
1560 * the last try looking at it; avoid AskOffline'ing the same vol
1563 if (singleVolumeNumber
1564 && summary.header.id != singleVolumeNumber) {
1565 /* don't offline singleVolumeNumber; we already did that
1568 AskOffline(salvinfo, summary.header.id);
1570 #ifdef AFS_DEMAND_ATTACH_FS
1572 /* don't lock the volume if the header is bad, since we're
1573 * about to delete it anyway. */
1574 if (LockVolume(salvinfo, summary.header.id)) {
1579 #endif /* AFS_DEMAND_ATTACH_FS */
1583 if (last && !Showmode) {
1584 Log("Volume header file %s is incorrectly named (should be %s "
1585 "not %s); %sdeleted (it will be recreated later, if "
1586 "necessary)\n", name, nameShouldBe, base,
1587 (Testing ? "it would have been " : ""));
1592 summary.fileName = ToString(base);
1595 if (params->nVolumes > params->totalVolumes) {
1596 /* We found more volumes than we found on the first partition walk;
1597 * apparently something created a volume while we were
1598 * partition-salvaging, or we found more than 20 vols when salvaging a
1599 * particular volume. Abort if we detect this, since other programs
1600 * supposed to not touch the partition while it is partition-salvaging,
1601 * and we shouldn't find more than 20 vols in a VG.
1603 Abort("Found %ld vol headers, but should have found at most %ld! "
1604 "Make sure the volserver/fileserver are not running at the "
1605 "same time as a partition salvage\n",
1606 afs_printable_int32_ld(params->nVolumes),
1607 afs_printable_int32_ld(params->totalVolumes));
1610 memcpy(params->vsp, &summary, sizeof(summary));
1618 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1620 * If the header could not be read in at all, the header is always unlinked.
1621 * If instead RecordHeader said the header was bad (that is, the header file
1622 * is mis-named), we only unlink if we are doing a partition salvage, as
1623 * opposed to salvaging a specific volume group.
1625 * @param[in] dp the disk partition
1626 * @param[in] name full path to the .vol header
1627 * @param[in] hdr header data, or NULL if the header could not be read
1628 * @param[in] rock actually a struct SalvageScanParams*, with some information
1632 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1633 struct VolumeDiskHeader *hdr, void *rock)
1635 struct SalvageScanParams *params;
1638 params = (struct SalvageScanParams *)rock;
1641 /* no header; header is too bogus to read in at all */
1643 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1649 } else if (!params->singleVolumeNumber) {
1650 /* We were able to read in a header, but RecordHeader said something
1651 * was wrong with it. We only unlink those if we are doing a partition
1658 if (dounlink && unlink(name)) {
1659 Log("Error %d while trying to unlink %s\n", errno, name);
1664 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1665 * the fileserver for VG information, or by scanning the /vicepX partition.
1667 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1668 * are salvaging, or 0 if this is a partition
1671 * @return operation status
1673 * @retval -1 we raced with a fileserver restart; checking out and locking
1674 * volumes must be retried
1677 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1679 afs_int32 nvols = 0;
1680 struct SalvageScanParams params;
1683 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1685 /* we successfully got the vol information from the fileserver; no
1686 * need to scan the partition */
1690 /* we need to retry volume checkout */
1694 if (!singleVolumeNumber) {
1695 /* Count how many volumes we have in /vicepX */
1696 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1699 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1704 nvols = VOL_VG_MAX_VOLS;
1707 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1708 osi_Assert(salvinfo->volumeSummaryp != NULL);
1710 params.singleVolumeNumber = singleVolumeNumber;
1711 params.vsp = salvinfo->volumeSummaryp;
1712 params.nVolumes = 0;
1713 params.totalVolumes = nvols;
1715 params.salvinfo = salvinfo;
1717 /* walk the partition directory of volume headers and record the info
1718 * about them; unlinking invalid headers */
1719 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1720 UnlinkHeader, ¶ms);
1722 /* we apparently need to retry checking-out/locking volumes */
1726 Abort("Failed to get volume header summary\n");
1728 salvinfo->nVolumes = params.nVolumes;
1730 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1736 /* Find the link table. This should be associated with the RW volume or, if
1737 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1740 FindLinkHandle(struct InodeSummary *isp, int nVols,
1741 struct ViceInodeInfo *allInodes)
1744 struct ViceInodeInfo *ip;
1746 for (i = 0; i < nVols; i++) {
1747 ip = allInodes + isp[i].index;
1748 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1749 if (ip[j].u.special.type == VI_LINKTABLE)
1750 return ip[j].inodeNumber;
1757 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1759 struct versionStamp version;
1762 if (!VALID_INO(ino))
1764 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1765 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1766 if (!VALID_INO(ino))
1768 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1769 isp->RWvolumeId, errno);
1770 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1771 fdP = IH_OPEN(salvinfo->VGLinkH);
1773 Abort("Can't open link table for volume %u (error = %d)\n",
1774 isp->RWvolumeId, errno);
1776 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1777 Abort("Can't truncate link table for volume %u (error = %d)\n",
1778 isp->RWvolumeId, errno);
1780 version.magic = LINKTABLEMAGIC;
1781 version.version = LINKTABLEVERSION;
1783 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1785 Abort("Can't truncate link table for volume %u (error = %d)\n",
1786 isp->RWvolumeId, errno);
1788 FDH_REALLYCLOSE(fdP);
1790 /* If the volume summary exits (i.e., the V*.vol header file exists),
1791 * then set this inode there as well.
1793 if (isp->volSummary)
1794 isp->volSummary->header.linkTable = ino;
1803 SVGParms_t *parms = (SVGParms_t *) arg;
1804 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1809 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1812 pthread_attr_t tattr;
1816 /* Initialize per volume global variables, even if later code does so */
1817 salvinfo->VolumeChanged = 0;
1818 salvinfo->VGLinkH = NULL;
1819 salvinfo->VGLinkH_cnt = 0;
1820 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1822 parms.svgp_inodeSummaryp = isp;
1823 parms.svgp_count = nVols;
1824 parms.svgp_salvinfo = salvinfo;
1825 code = pthread_attr_init(&tattr);
1827 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1831 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1833 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1836 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1838 Log("Failed to create thread to salvage volume group %u\n",
1842 (void)pthread_join(tid, NULL);
1844 #endif /* AFS_NT40_ENV */
1847 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1849 struct ViceInodeInfo *inodes, *allInodes, *ip;
1850 int i, totalInodes, size, salvageTo;
1854 int dec_VGLinkH = 0;
1856 FdHandle_t *fdP = NULL;
1858 salvinfo->VGLinkH_cnt = 0;
1859 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1860 && isp->nSpecialInodes > 0);
1861 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1862 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1865 if (ShowMounts && !haveRWvolume)
1867 if (canfork && !debug && Fork() != 0) {
1868 (void)Wait("Salvage volume group");
1871 for (i = 0, totalInodes = 0; i < nVols; i++)
1872 totalInodes += isp[i].nInodes;
1873 size = totalInodes * sizeof(struct ViceInodeInfo);
1874 inodes = (struct ViceInodeInfo *)malloc(size);
1875 allInodes = inodes - isp->index; /* this would the base of all the inodes
1876 * for the partition, if all the inodes
1877 * had been read into memory */
1878 osi_Assert(afs_lseek
1879 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1881 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1883 /* Don't try to salvage a read write volume if there isn't one on this
1885 salvageTo = haveRWvolume ? 0 : 1;
1887 #ifdef AFS_NAMEI_ENV
1888 ino = FindLinkHandle(isp, nVols, allInodes);
1889 if (VALID_INO(ino)) {
1890 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1891 fdP = IH_OPEN(salvinfo->VGLinkH);
1893 if (!VALID_INO(ino) || fdP == NULL) {
1894 Log("%s link table for volume %u.\n",
1895 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1897 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1900 struct ViceInodeInfo *ip;
1901 CreateLinkTable(salvinfo, isp, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1903 /* Sync fake 1 link counts to the link table, now that it exists */
1905 for (i = 0; i < nVols; i++) {
1906 ip = allInodes + isp[i].index;
1907 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1908 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1915 FDH_REALLYCLOSE(fdP);
1917 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1920 /* Salvage in reverse order--read/write volume last; this way any
1921 * Inodes not referenced by the time we salvage the read/write volume
1922 * can be picked up by the read/write volume */
1923 /* ACTUALLY, that's not done right now--the inodes just vanish */
1924 for (i = nVols - 1; i >= salvageTo; i--) {
1926 struct InodeSummary *lisp = &isp[i];
1927 #ifdef AFS_NAMEI_ENV
1928 /* If only the RO is present on this partition, the link table
1929 * shows up as a RW volume special file. Need to make sure the
1930 * salvager doesn't try to salvage the non-existent RW.
1932 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1933 /* If this only special inode is the link table, continue */
1934 if (inodes->u.special.type == VI_LINKTABLE) {
1941 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1942 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1943 /* Check inodes twice. The second time do things seriously. This
1944 * way the whole RO volume can be deleted, below, if anything goes wrong */
1945 for (check = 1; check >= 0; check--) {
1947 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1949 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1950 if (rw && deleteMe) {
1951 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1952 * volume won't be called */
1958 if (rw && check == 1)
1960 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1961 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1967 /* Fix actual inode counts */
1970 Log("totalInodes %d\n",totalInodes);
1971 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1972 static int TraceBadLinkCounts = 0;
1973 #ifdef AFS_NAMEI_ENV
1974 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1975 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1976 VGLinkH_p1 = ip->u.param[0];
1977 continue; /* Deal with this last. */
1980 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1981 TraceBadLinkCounts--; /* Limit reports, per volume */
1982 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1984 while (ip->linkCount > 0) {
1985 /* below used to assert, not break */
1987 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1988 Log("idec failed. inode %s errno %d\n",
1989 PrintInode(stmp, ip->inodeNumber), errno);
1995 while (ip->linkCount < 0) {
1996 /* these used to be asserts */
1998 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1999 Log("iinc failed. inode %s errno %d\n",
2000 PrintInode(stmp, ip->inodeNumber), errno);
2007 #ifdef AFS_NAMEI_ENV
2008 while (dec_VGLinkH > 0) {
2009 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2010 Log("idec failed on link table, errno = %d\n", errno);
2014 while (dec_VGLinkH < 0) {
2015 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2016 Log("iinc failed on link table, errno = %d\n", errno);
2023 /* Directory consistency checks on the rw volume */
2025 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2026 IH_RELEASE(salvinfo->VGLinkH);
2028 if (canfork && !debug) {
2035 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2037 /* Check headers BEFORE forking */
2041 for (i = 0; i < nVols; i++) {
2042 struct VolumeSummary *vs = isp[i].volSummary;
2043 VolumeDiskData volHeader;
2045 /* Don't salvage just because phantom rw volume is there... */
2046 /* (If a read-only volume exists, read/write inodes must also exist) */
2047 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2051 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2052 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2053 == sizeof(volHeader)
2054 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2055 && volHeader.dontSalvage == DONT_SALVAGE
2056 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2057 if (volHeader.inUse != 0) {
2058 volHeader.inUse = 0;
2059 volHeader.inService = 1;
2061 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2062 != sizeof(volHeader)) {
2078 /* SalvageVolumeHeaderFile
2080 * Salvage the top level V*.vol header file. Make sure the special files
2081 * exist and that there are no duplicates.
2083 * Calls SalvageHeader for each possible type of volume special file.
2087 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2088 struct ViceInodeInfo *inodes, int RW,
2089 int check, int *deleteMe)
2092 struct ViceInodeInfo *ip;
2093 int allinodesobsolete = 1;
2094 struct VolumeDiskHeader diskHeader;
2095 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2097 struct VolumeHeader tempHeader;
2098 struct afs_inode_info stuff[MAXINODETYPE];
2100 /* keeps track of special inodes that are probably 'good'; they are
2101 * referenced in the vol header, and are included in the given inodes
2106 } goodspecial[MAXINODETYPE];
2111 memset(goodspecial, 0, sizeof(goodspecial));
2113 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2115 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2117 Log("cannot allocate memory for inode skip array when salvaging "
2118 "volume %lu; not performing duplicate special inode recovery\n",
2119 afs_printable_uint32_lu(isp->volumeId));
2120 /* still try to perform the salvage; the skip array only does anything
2121 * if we detect duplicate special inodes */
2124 init_inode_info(&tempHeader, stuff);
2127 * First, look at the special inodes and see if any are referenced by
2128 * the existing volume header. If we find duplicate special inodes, we
2129 * can use this information to use the referenced inode (it's more
2130 * likely to be the 'good' one), and throw away the duplicates.
2132 if (isp->volSummary && skip) {
2133 /* use tempHeader, so we can use the stuff[] array to easily index
2134 * into the isp->volSummary special inodes */
2135 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2137 for (i = 0; i < isp->nSpecialInodes; i++) {
2138 ip = &inodes[isp->index + i];
2139 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2140 /* will get taken care of in a later loop */
2143 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2144 goodspecial[ip->u.special.type-1].valid = 1;
2145 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2150 memset(&tempHeader, 0, sizeof(tempHeader));
2151 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2152 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2153 tempHeader.id = isp->volumeId;
2154 tempHeader.parent = isp->RWvolumeId;
2156 /* Check for duplicates (inodes are sorted by type field) */
2157 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2158 ip = &inodes[isp->index + i];
2159 if (ip->u.special.type == (ip + 1)->u.special.type) {
2160 afs_ino_str_t stmp1, stmp2;
2162 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2163 /* Will be caught in the loop below */
2167 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2168 ip->u.special.type, isp->volumeId,
2169 PrintInode(stmp1, ip->inodeNumber),
2170 PrintInode(stmp2, (ip+1)->inodeNumber));
2172 if (skip && goodspecial[ip->u.special.type-1].valid) {
2173 Inode gi = goodspecial[ip->u.special.type-1].inode;
2176 Log("using special inode referenced by vol header (%s)\n",
2177 PrintInode(stmp1, gi));
2180 /* the volume header references some special inode of
2181 * this type in the inodes array; are we it? */
2182 if (ip->inodeNumber != gi) {
2184 } else if ((ip+1)->inodeNumber != gi) {
2185 /* in case this is the last iteration; we need to
2186 * make sure we check ip+1, too */
2191 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2199 for (i = 0; i < isp->nSpecialInodes; i++) {
2201 ip = &inodes[isp->index + i];
2202 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2204 Log("Rubbish header inode %s of type %d\n",
2205 PrintInode(stmp, ip->inodeNumber),
2206 ip->u.special.type);
2212 Log("Rubbish header inode %s of type %d; deleted\n",
2213 PrintInode(stmp, ip->inodeNumber),
2214 ip->u.special.type);
2215 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2216 if (skip && skip[i]) {
2217 if (orphans == ORPH_REMOVE) {
2218 Log("Removing orphan special inode %s of type %d\n",
2219 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2222 Log("Ignoring orphan special inode %s of type %d\n",
2223 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2224 /* fall through to the ip->linkCount--; line below */
2227 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2228 allinodesobsolete = 0;
2230 if (!check && ip->u.special.type != VI_LINKTABLE)
2231 ip->linkCount--; /* Keep the inode around */
2239 if (allinodesobsolete) {
2246 salvinfo->VGLinkH_cnt++; /* one for every header. */
2248 if (!RW && !check && isp->volSummary) {
2249 ClearROInUseBit(isp->volSummary);
2253 for (i = 0; i < MAXINODETYPE; i++) {
2254 if (stuff[i].inodeType == VI_LINKTABLE) {
2255 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2256 * And we may have recreated the link table earlier, so set the
2257 * RW header as well.
2259 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2260 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2264 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2268 if (isp->volSummary == NULL) {
2270 char headerName[64];
2271 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2272 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2274 Log("No header file for volume %u\n", isp->volumeId);
2278 Log("No header file for volume %u; %screating %s\n",
2279 isp->volumeId, (Testing ? "it would have been " : ""),
2281 isp->volSummary = (struct VolumeSummary *)
2282 malloc(sizeof(struct VolumeSummary));
2283 isp->volSummary->fileName = ToString(headerName);
2285 writefunc = VCreateVolumeDiskHeader;
2288 char headerName[64];
2289 /* hack: these two fields are obsolete... */
2290 isp->volSummary->header.volumeAcl = 0;
2291 isp->volSummary->header.volumeMountTable = 0;
2294 (&isp->volSummary->header, &tempHeader,
2295 sizeof(struct VolumeHeader))) {
2296 /* We often remove the name before calling us, so we make a fake one up */
2297 if (isp->volSummary->fileName) {
2298 strcpy(headerName, isp->volSummary->fileName);
2300 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2301 isp->volSummary->fileName = ToString(headerName);
2303 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2305 Log("Header file %s is damaged or no longer valid%s\n", path,
2306 (check ? "" : "; repairing"));
2310 writefunc = VWriteVolumeDiskHeader;
2314 memcpy(&isp->volSummary->header, &tempHeader,
2315 sizeof(struct VolumeHeader));
2318 Log("It would have written a new header file for volume %u\n",
2322 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2323 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2325 Log("Error %ld writing volume header file for volume %lu\n",
2326 afs_printable_int32_ld(code),
2327 afs_printable_uint32_lu(diskHeader.id));
2332 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2333 isp->volSummary->header.volumeInfo);
2338 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2339 struct InodeSummary *isp, int check, int *deleteMe)
2342 VolumeDiskData volumeInfo;
2343 struct versionStamp fileHeader;
2352 #ifndef AFS_NAMEI_ENV
2353 if (sp->inodeType == VI_LINKTABLE)
2356 if (*(sp->inode) == 0) {
2358 Log("Missing inode in volume header (%s)\n", sp->description);
2362 Log("Missing inode in volume header (%s); %s\n", sp->description,
2363 (Testing ? "it would have recreated it" : "recreating"));
2366 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2367 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2368 if (!VALID_INO(*(sp->inode)))
2370 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2371 sp->description, errno);
2376 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2377 fdP = IH_OPEN(specH);
2378 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2379 /* bail out early and destroy the volume */
2381 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2388 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2389 sp->description, errno);
2392 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2393 || header.fileHeader.magic != sp->stamp.magic)) {
2395 Log("Part of the header (%s) is corrupted\n", sp->description);
2396 FDH_REALLYCLOSE(fdP);
2400 Log("Part of the header (%s) is corrupted; recreating\n",
2403 /* header can be garbage; make sure we don't read garbage data from
2405 memset(&header, 0, sizeof(header));
2407 if (sp->inodeType == VI_VOLINFO
2408 && header.volumeInfo.destroyMe == DESTROY_ME) {
2411 FDH_REALLYCLOSE(fdP);
2415 if (recreate && !Testing) {
2418 ("Internal error: recreating volume header (%s) in check mode\n",
2420 nBytes = FDH_TRUNC(fdP, 0);
2422 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2423 sp->description, errno);
2425 /* The following code should be moved into vutil.c */
2426 if (sp->inodeType == VI_VOLINFO) {
2428 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2429 header.volumeInfo.stamp = sp->stamp;
2430 header.volumeInfo.id = isp->volumeId;
2431 header.volumeInfo.parentId = isp->RWvolumeId;
2432 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2433 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2434 isp->volumeId, isp->volumeId);
2435 header.volumeInfo.inService = 0;
2436 header.volumeInfo.blessed = 0;
2437 /* The + 1000 is a hack in case there are any files out in venus caches */
2438 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2439 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2440 header.volumeInfo.needsCallback = 0;
2441 gettimeofday(&tp, 0);
2442 header.volumeInfo.creationDate = tp.tv_sec;
2444 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2445 sizeof(header.volumeInfo), 0);
2446 if (nBytes != sizeof(header.volumeInfo)) {
2449 ("Unable to write volume header file (%s) (errno = %d)\n",
2450 sp->description, errno);
2451 Abort("Unable to write entire volume header file (%s)\n",
2455 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2456 if (nBytes != sizeof(sp->stamp)) {
2459 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2460 sp->description, errno);
2462 ("Unable to write entire version stamp in volume header file (%s)\n",
2467 FDH_REALLYCLOSE(fdP);
2469 if (sp->inodeType == VI_VOLINFO) {
2470 salvinfo->VolInfo = header.volumeInfo;
2474 if (salvinfo->VolInfo.updateDate) {
2475 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2477 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2478 salvinfo->VolInfo.id,
2479 (Testing ? "it would have been " : ""), update);
2481 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2483 Log("%s (%u) not updated (created %s)\n",
2484 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2494 SalvageVnodes(struct SalvInfo *salvinfo,
2495 struct InodeSummary *rwIsp,
2496 struct InodeSummary *thisIsp,
2497 struct ViceInodeInfo *inodes, int check)
2499 int ilarge, ismall, ioffset, RW, nInodes;
2500 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2503 RW = (rwIsp == thisIsp);
2504 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2506 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2507 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2508 if (check && ismall == -1)
2511 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2512 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2513 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2517 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2518 struct ViceInodeInfo *ip, int nInodes,
2519 struct VolumeSummary *volSummary, int check)
2521 char buf[SIZEOF_LARGEDISKVNODE];
2522 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2524 StreamHandle_t *file;
2525 struct VnodeClassInfo *vcp;
2527 afs_sfsize_t nVnodes;
2528 afs_fsize_t vnodeLength;
2530 afs_ino_str_t stmp1, stmp2;
2534 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2535 fdP = IH_OPEN(handle);
2536 osi_Assert(fdP != NULL);
2537 file = FDH_FDOPEN(fdP, "r+");
2538 osi_Assert(file != NULL);
2539 vcp = &VnodeClassInfo[class];
2540 size = OS_SIZE(fdP->fd_fd);
2541 osi_Assert(size != -1);
2542 nVnodes = (size / vcp->diskSize) - 1;
2544 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2545 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2549 for (vnodeIndex = 0;
2550 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2551 nVnodes--, vnodeIndex++) {
2552 if (vnode->type != vNull) {
2553 int vnodeChanged = 0;
2554 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2555 if (VNDISK_GET_INO(vnode) == 0) {
2557 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2558 memset(vnode, 0, vcp->diskSize);
2562 if (vcp->magic != vnode->vnodeMagic) {
2563 /* bad magic #, probably partially created vnode */
2564 Log("Partially allocated vnode %d deleted.\n",
2566 memset(vnode, 0, vcp->diskSize);
2570 /* ****** Should do a bit more salvage here: e.g. make sure
2571 * vnode type matches what it should be given the index */
2572 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2573 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2574 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2575 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2582 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2583 /* The following doesn't work, because the version number
2584 * is not maintained correctly by the file server */
2585 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2586 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2588 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2594 /* For RW volume, look for vnode with matching inode number;
2595 * if no such match, take the first determined by our sort
2597 struct ViceInodeInfo *lip = ip;
2598 int lnInodes = nInodes;
2600 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2601 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2610 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2611 /* "Matching" inode */
2615 vu = vnode->uniquifier;
2616 iu = ip->u.vnode.vnodeUniquifier;
2617 vd = vnode->dataVersion;
2618 id = ip->u.vnode.inodeDataVersion;
2620 * Because of the possibility of the uniquifier overflows (> 4M)
2621 * we compare them modulo the low 22-bits; we shouldn't worry
2622 * about mismatching since they shouldn't to many old
2623 * uniquifiers of the same vnode...
2625 if (IUnique(vu) != IUnique(iu)) {
2627 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2630 vnode->uniquifier = iu;
2631 #ifdef AFS_3DISPARES
2632 vnode->dataVersion = (id >= vd ?
2635 1887437 ? vd : id) :
2638 1887437 ? id : vd));
2640 #if defined(AFS_SGI_EXMAG)
2641 vnode->dataVersion = (id >= vd ?
2644 15099494 ? vd : id) :
2647 15099494 ? id : vd));
2649 vnode->dataVersion = (id > vd ? id : vd);
2650 #endif /* AFS_SGI_EXMAG */
2651 #endif /* AFS_3DISPARES */
2654 /* don't bother checking for vd > id any more, since
2655 * partial file transfers always result in this state,
2656 * and you can't do much else anyway (you've already
2657 * found the best data you can) */
2658 #ifdef AFS_3DISPARES
2659 if (!vnodeIsDirectory(vnodeNumber)
2660 && ((vd < id && (id - vd) < 1887437)
2661 || ((vd > id && (vd - id) > 1887437)))) {
2663 #if defined(AFS_SGI_EXMAG)
2664 if (!vnodeIsDirectory(vnodeNumber)
2665 && ((vd < id && (id - vd) < 15099494)
2666 || ((vd > id && (vd - id) > 15099494)))) {
2668 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2669 #endif /* AFS_SGI_EXMAG */
2672 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2673 vnode->dataVersion = id;
2678 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2681 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2683 VNDISK_SET_INO(vnode, ip->inodeNumber);
2688 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2690 VNDISK_SET_INO(vnode, ip->inodeNumber);
2693 VNDISK_GET_LEN(vnodeLength, vnode);
2694 if (ip->byteCount != vnodeLength) {
2697 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2702 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2703 VNDISK_SET_LEN(vnode, ip->byteCount);
2707 ip->linkCount--; /* Keep the inode around */
2710 } else { /* no matching inode */
2712 if (VNDISK_GET_INO(vnode) != 0
2713 || vnode->type == vDirectory) {
2714 /* No matching inode--get rid of the vnode */
2716 if (VNDISK_GET_INO(vnode)) {
2718 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2722 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2727 if (VNDISK_GET_INO(vnode)) {
2729 time_t serverModifyTime = vnode->serverModifyTime;
2730 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2734 time_t serverModifyTime = vnode->serverModifyTime;
2735 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2738 memset(vnode, 0, vcp->diskSize);
2741 /* Should not reach here becuase we checked for
2742 * (inodeNumber == 0) above. And where we zero the vnode,
2743 * we also goto vnodeDone.
2747 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2751 } /* VNDISK_GET_INO(vnode) != 0 */
2753 osi_Assert(!(vnodeChanged && check));
2754 if (vnodeChanged && !Testing) {
2755 osi_Assert(IH_IWRITE
2756 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2757 (char *)vnode, vcp->diskSize)
2759 salvinfo->VolumeChanged = 1; /* For break call back */
2770 struct VnodeEssence *
2771 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2774 struct VnodeInfo *vip;
2777 class = vnodeIdToClass(vnodeNumber);
2778 vip = &salvinfo->vnodeInfo[class];
2779 offset = vnodeIdToBitNumber(vnodeNumber);
2780 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2784 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2786 /* Copy the directory unconditionally if we are going to change it:
2787 * not just if was cloned.
2789 struct VnodeDiskObject vnode;
2790 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2791 Inode oldinode, newinode;
2794 if (dir->copied || Testing)
2796 DFlush(); /* Well justified paranoia... */
2799 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2800 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2802 osi_Assert(code == sizeof(vnode));
2803 oldinode = VNDISK_GET_INO(&vnode);
2804 /* Increment the version number by a whole lot to avoid problems with
2805 * clients that were promised new version numbers--but the file server
2806 * crashed before the versions were written to disk.
2809 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2810 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2812 osi_Assert(VALID_INO(newinode));
2813 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2815 VNDISK_SET_INO(&vnode, newinode);
2817 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2818 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2820 osi_Assert(code == sizeof(vnode));
2822 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2823 salvinfo->fileSysDevice, newinode,
2824 &salvinfo->VolumeChanged);
2825 /* Don't delete the original inode right away, because the directory is
2826 * still being scanned.
2832 * This function should either successfully create a new dir, or give up
2833 * and leave things the way they were. In particular, if it fails to write
2834 * the new dir properly, it should return w/o changing the reference to the
2838 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2840 struct VnodeDiskObject vnode;
2841 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2842 Inode oldinode, newinode;
2847 afs_int32 parentUnique = 1;
2848 struct VnodeEssence *vnodeEssence;
2853 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2855 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2856 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2858 osi_Assert(lcode == sizeof(vnode));
2859 oldinode = VNDISK_GET_INO(&vnode);
2860 /* Increment the version number by a whole lot to avoid problems with
2861 * clients that were promised new version numbers--but the file server
2862 * crashed before the versions were written to disk.
2865 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2866 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2868 osi_Assert(VALID_INO(newinode));
2869 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2870 &salvinfo->VolumeChanged);
2872 /* Assign . and .. vnode numbers from dir and vnode.parent.
2873 * The uniquifier for . is in the vnode.
2874 * The uniquifier for .. might be set to a bogus value of 1 and
2875 * the salvager will later clean it up.
2877 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2878 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2881 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2883 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2888 /* didn't really build the new directory properly, let's just give up. */
2889 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2890 Log("Directory salvage returned code %d, continuing.\n", code);
2892 Log("also failed to decrement link count on new inode");
2896 Log("Checking the results of the directory salvage...\n");
2897 if (!DirOK(&newdir)) {
2898 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2899 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2900 osi_Assert(code == 0);
2904 VNDISK_SET_INO(&vnode, newinode);
2905 length = Length(&newdir);
2906 VNDISK_SET_LEN(&vnode, length);
2908 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2909 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2911 osi_Assert(lcode == sizeof(vnode));
2914 nt_sync(salvinfo->fileSysDevice);
2916 sync(); /* this is slow, but hopefully rarely called. We don't have
2917 * an open FD on the file itself to fsync.
2921 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2923 /* make sure old directory file is really closed */
2924 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2925 FDH_REALLYCLOSE(fdP);
2927 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2928 osi_Assert(code == 0);
2929 dir->dirHandle = newdir;
2933 * arguments for JudgeEntry.
2935 struct judgeEntry_params {
2936 struct DirSummary *dir; /**< directory we're examining entries in */
2937 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2941 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2944 struct judgeEntry_params *params = arock;
2945 struct DirSummary *dir = params->dir;
2946 struct SalvInfo *salvinfo = params->salvinfo;
2947 struct VnodeEssence *vnodeEssence;
2948 afs_int32 dirOrphaned, todelete;
2950 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2952 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2953 if (vnodeEssence == NULL) {
2955 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2958 CopyOnWrite(salvinfo, dir);
2959 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2964 #ifndef AFS_NAMEI_ENV
2965 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2966 * mount inode for the partition. If this inode were deleted, it would crash
2969 if (vnodeEssence->InodeNumber == 0) {
2970 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2972 CopyOnWrite(salvinfo, dir);
2973 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2980 if (!(vnodeNumber & 1) && !Showmode
2981 && !(vnodeEssence->count || vnodeEssence->unique
2982 || vnodeEssence->modeBits)) {
2983 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2984 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2985 vnodeNumber, unique,
2986 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2990 CopyOnWrite(salvinfo, dir);
2991 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2997 /* Check if the Uniquifiers match. If not, change the directory entry
2998 * so its unique matches the vnode unique. Delete if the unique is zero
2999 * or if the directory is orphaned.
3001 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3002 if (!vnodeEssence->unique
3003 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3004 /* This is an orphaned directory. Don't delete the . or ..
3005 * entry. Otherwise, it will get created in the next
3006 * salvage and deleted again here. So Just skip it.
3011 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3014 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3018 fid.Vnode = vnodeNumber;
3019 fid.Unique = vnodeEssence->unique;
3020 CopyOnWrite(salvinfo, dir);
3021 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3023 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3026 return 0; /* no need to continue */
3029 if (strcmp(name, ".") == 0) {
3030 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3033 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3035 CopyOnWrite(salvinfo, dir);
3036 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3037 fid.Vnode = dir->vnodeNumber;
3038 fid.Unique = dir->unique;
3039 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3042 vnodeNumber = fid.Vnode; /* Get the new Essence */
3043 unique = fid.Unique;
3044 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3047 } else if (strcmp(name, "..") == 0) {
3050 struct VnodeEssence *dotdot;
3051 pa.Vnode = dir->parent;
3052 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3053 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3054 pa.Unique = dotdot->unique;
3056 pa.Vnode = dir->vnodeNumber;
3057 pa.Unique = dir->unique;
3059 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3061 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3063 CopyOnWrite(salvinfo, dir);
3064 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3065 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3068 vnodeNumber = pa.Vnode; /* Get the new Essence */
3070 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3072 dir->haveDotDot = 1;
3073 } else if (strncmp(name, ".__afs", 6) == 0) {
3075 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3078 CopyOnWrite(salvinfo, dir);
3079 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3081 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3082 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3085 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3086 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3087 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3088 && !(vnodeEssence->modeBits & 0111)) {
3095 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3096 vnodeEssence->InodeNumber);
3099 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3103 size = FDH_SIZE(fdP);
3105 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3106 FDH_REALLYCLOSE(fdP);
3113 nBytes = FDH_PREAD(fdP, buf, size, 0);
3114 if (nBytes == size) {
3116 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3117 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3118 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3119 Testing ? "would convert" : "converted");
3120 vnodeEssence->modeBits |= 0111;
3121 vnodeEssence->changed = 1;
3122 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3123 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3124 dir->name ? dir->name : "??", name, buf);
3126 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3127 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3129 FDH_REALLYCLOSE(fdP);
3132 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3133 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3134 if (vnodeIdToClass(vnodeNumber) == vLarge
3135 && vnodeEssence->name == NULL) {
3137 if ((n = (char *)malloc(strlen(name) + 1)))
3139 vnodeEssence->name = n;
3142 /* The directory entry points to the vnode. Check to see if the
3143 * vnode points back to the directory. If not, then let the
3144 * directory claim it (else it might end up orphaned). Vnodes
3145 * already claimed by another directory are deleted from this
3146 * directory: hardlinks to the same vnode are not allowed
3147 * from different directories.
3149 if (vnodeEssence->parent != dir->vnodeNumber) {
3150 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3151 /* Vnode does not point back to this directory.
3152 * Orphaned dirs cannot claim a file (it may belong to
3153 * another non-orphaned dir).
3156 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3158 vnodeEssence->parent = dir->vnodeNumber;
3159 vnodeEssence->changed = 1;
3161 /* Vnode was claimed by another directory */
3164 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3165 } else if (vnodeNumber == 1) {
3166 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3168 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3172 CopyOnWrite(salvinfo, dir);
3173 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3178 /* This directory claims the vnode */
3179 vnodeEssence->claimed = 1;
3181 vnodeEssence->count--;
3186 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3187 VnodeClass class, Inode ino, Unique * maxu)
3189 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3190 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3191 char buf[SIZEOF_LARGEDISKVNODE];
3192 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3194 StreamHandle_t *file;
3199 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3200 fdP = IH_OPEN(vip->handle);
3201 osi_Assert(fdP != NULL);
3202 file = FDH_FDOPEN(fdP, "r+");
3203 osi_Assert(file != NULL);
3204 size = OS_SIZE(fdP->fd_fd);
3205 osi_Assert(size != -1);
3206 vip->nVnodes = (size / vcp->diskSize) - 1;
3207 if (vip->nVnodes > 0) {
3208 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3209 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3210 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3211 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3212 if (class == vLarge) {
3213 osi_Assert((vip->inodes = (Inode *)
3214 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3223 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3224 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3225 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3226 nVnodes--, vnodeIndex++) {
3227 if (vnode->type != vNull) {
3228 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3229 afs_fsize_t vnodeLength;
3230 vip->nAllocatedVnodes++;
3231 vep->count = vnode->linkCount;
3232 VNDISK_GET_LEN(vnodeLength, vnode);
3233 vep->blockCount = nBlocks(vnodeLength);
3234 vip->volumeBlockCount += vep->blockCount;
3235 vep->parent = vnode->parent;
3236 vep->unique = vnode->uniquifier;
3237 if (*maxu < vnode->uniquifier)
3238 *maxu = vnode->uniquifier;
3239 vep->modeBits = vnode->modeBits;
3240 vep->InodeNumber = VNDISK_GET_INO(vnode);
3241 vep->type = vnode->type;
3242 vep->author = vnode->author;
3243 vep->owner = vnode->owner;
3244 vep->group = vnode->group;
3245 if (vnode->type == vDirectory) {
3246 if (class != vLarge) {
3247 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3248 vip->nAllocatedVnodes--;
3249 memset(vnode, 0, sizeof(vnode));
3250 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3251 vnodeIndexOffset(vcp, vnodeNumber),
3252 (char *)&vnode, sizeof(vnode));
3253 salvinfo->VolumeChanged = 1;
3255 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3264 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3267 struct VnodeEssence *parentvp;
3273 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3274 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3276 strcat(path, vp->name);
3282 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3283 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3286 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3288 struct VnodeEssence *vep;
3291 return (1); /* Vnode zero does not exist */
3293 return (0); /* The root dir vnode is always claimed */
3294 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3295 if (!vep || !vep->claimed)
3296 return (1); /* Vnode is not claimed - it is orphaned */
3298 return (IsVnodeOrphaned(salvinfo, vep->parent));
3302 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3303 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3304 struct DirSummary *rootdir, int *rootdirfound)
3306 static struct DirSummary dir;
3307 static struct DirHandle dirHandle;
3308 struct VnodeEssence *parent;
3309 static char path[MAXPATHLEN];
3312 if (dirVnodeInfo->vnodes[i].salvaged)
3313 return; /* already salvaged */
3316 dirVnodeInfo->vnodes[i].salvaged = 1;
3318 if (dirVnodeInfo->inodes[i] == 0)
3319 return; /* Not allocated to a directory */
3321 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3322 if (dirVnodeInfo->vnodes[i].parent) {
3323 Log("Bad parent, vnode 1; %s...\n",
3324 (Testing ? "skipping" : "salvaging"));
3325 dirVnodeInfo->vnodes[i].parent = 0;
3326 dirVnodeInfo->vnodes[i].changed = 1;
3329 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3330 if (parent && parent->salvaged == 0)
3331 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3332 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3333 rootdir, rootdirfound);
3336 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3337 dir.unique = dirVnodeInfo->vnodes[i].unique;
3340 dir.parent = dirVnodeInfo->vnodes[i].parent;
3341 dir.haveDot = dir.haveDotDot = 0;
3342 dir.ds_linkH = alinkH;
3343 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3344 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3346 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3349 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3350 (Testing ? "skipping" : "salvaging"));
3353 CopyAndSalvage(salvinfo, &dir);
3355 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3358 dirHandle = dir.dirHandle;
3361 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3362 &dirVnodeInfo->vnodes[i], path);
3365 /* If enumeration failed for random reasons, we will probably delete
3366 * too much stuff, so we guard against this instead.
3368 struct judgeEntry_params judge_params;
3369 judge_params.salvinfo = salvinfo;
3370 judge_params.dir = &dir;
3372 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3375 /* Delete the old directory if it was copied in order to salvage.
3376 * CopyOnWrite has written the new inode # to the disk, but we still
3377 * have the old one in our local structure here. Thus, we idec the
3381 if (dir.copied && !Testing) {
3382 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3383 osi_Assert(code == 0);
3384 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3387 /* Remember rootdir DirSummary _after_ it has been judged */
3388 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3389 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3397 * Get a new FID that can be used to create a new file.
3399 * @param[in] volHeader vol header for the volume
3400 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3401 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3402 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3403 * updated to the new max unique if we create a new
3407 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3408 VnodeClass class, AFSFid *afid, Unique *maxunique)
3411 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3412 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3416 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3417 /* no free vnodes; make a new one */
3418 salvinfo->vnodeInfo[class].nVnodes++;
3419 salvinfo->vnodeInfo[class].vnodes =
3420 realloc(salvinfo->vnodeInfo[class].vnodes,
3421 sizeof(struct VnodeEssence) * (i+1));
3423 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3426 afid->Vnode = bitNumberToVnodeNumber(i, class);
3428 if (volHeader->uniquifier < (*maxunique + 1)) {
3429 /* header uniq is bad; it will get bumped by 2000 later */
3430 afid->Unique = *maxunique + 1 + 2000;
3433 /* header uniq seems okay; just use that */
3434 afid->Unique = *maxunique = volHeader->uniquifier++;
3439 * Create a vnode for a README file explaining not to use a recreated-root vol.
3441 * @param[in] volHeader vol header for the volume
3442 * @param[in] alinkH ihandle for i/o for the volume
3443 * @param[in] vid volume id
3444 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3445 * updated to the new max unique if we create a new
3447 * @param[out] afid FID for the new readme vnode
3448 * @param[out] ainode the inode for the new readme file
3450 * @return operation status
3455 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3456 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3460 struct VnodeDiskObject *rvnode = NULL;
3462 IHandle_t *readmeH = NULL;
3463 struct VnodeEssence *vep;
3465 time_t now = time(NULL);
3467 /* Try to make the note brief, but informative. Only administrators should
3468 * be able to read this file at first, so we can hopefully assume they
3469 * know what AFS is, what a volume is, etc. */
3471 "This volume has been salvaged, but has lost its original root directory.\n"
3472 "The root directory that exists now has been recreated from orphan files\n"
3473 "from the rest of the volume. This recreated root directory may interfere\n"
3474 "with old cached data on clients, and there is no way the salvager can\n"
3475 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3476 "use this volume, but only copy the salvaged data to a new volume.\n"
3477 "Continuing to use this volume as it exists now may cause some clients to\n"
3478 "behave oddly when accessing this volume.\n"
3479 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3480 /* ^ the person reading this probably just lost some data, so they could
3481 * use some cheering up. */
3483 /* -1 for the trailing NUL */
3484 length = sizeof(readme) - 1;
3486 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3488 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3490 /* create the inode and write the contents */
3491 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3492 salvinfo->fileSysPath, 0, vid,
3493 afid->Vnode, afid->Unique, 1);
3494 if (!VALID_INO(readmeinode)) {
3495 Log("CreateReadme: readme IH_CREATE failed\n");
3499 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3500 bytes = IH_IWRITE(readmeH, 0, readme, length);
3501 IH_RELEASE(readmeH);
3503 if (bytes != length) {
3504 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3505 (int)sizeof(readme));
3509 /* create the vnode and write it out */
3510 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3512 Log("CreateRootDir: error alloc'ing memory\n");
3516 rvnode->type = vFile;
3518 rvnode->modeBits = 0777;
3519 rvnode->linkCount = 1;
3520 VNDISK_SET_LEN(rvnode, length);
3521 rvnode->uniquifier = afid->Unique;
3522 rvnode->dataVersion = 1;
3523 VNDISK_SET_INO(rvnode, readmeinode);
3524 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3529 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3531 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3532 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3533 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3535 if (bytes != SIZEOF_SMALLDISKVNODE) {
3536 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3537 (int)SIZEOF_SMALLDISKVNODE);
3541 /* update VnodeEssence for new readme vnode */
3542 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3544 vep->blockCount = nBlocks(length);
3545 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3546 vep->parent = rvnode->parent;
3547 vep->unique = rvnode->uniquifier;
3548 vep->modeBits = rvnode->modeBits;
3549 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3550 vep->type = rvnode->type;
3551 vep->author = rvnode->author;
3552 vep->owner = rvnode->owner;
3553 vep->group = rvnode->group;
3563 *ainode = readmeinode;
3568 if (IH_DEC(alinkH, readmeinode, vid)) {
3569 Log("CreateReadme (recovery): IH_DEC failed\n");
3581 * create a root dir for a volume that lacks one.
3583 * @param[in] volHeader vol header for the volume
3584 * @param[in] alinkH ihandle for disk access for this volume group
3585 * @param[in] vid volume id we're dealing with
3586 * @param[out] rootdir populated with info about the new root dir
3587 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3588 * updated to the new max unique if we create a new
3591 * @return operation status
3596 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3597 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3601 int decroot = 0, decreadme = 0;
3602 AFSFid did, readmeid;
3605 struct VnodeDiskObject *rootvnode = NULL;
3606 struct acl_accessList *ACL;
3609 struct VnodeEssence *vep;
3611 time_t now = time(NULL);
3613 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3614 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3618 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3619 /* We don't have any large vnodes in the volume; allocate room
3620 * for one so we can recreate the root dir */
3621 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3622 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3623 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3625 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3626 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3629 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3630 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3631 if (vep->type != vNull) {
3632 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3636 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3637 &readmeinode) != 0) {
3642 /* set the DV to a very high number, so it is unlikely that we collide
3643 * with a cached DV */
3646 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3648 if (!VALID_INO(rootinode)) {
3649 Log("CreateRootDir: IH_CREATE failed\n");
3654 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3655 rootinode, &salvinfo->VolumeChanged);
3659 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3660 Log("CreateRootDir: MakeDir failed\n");
3663 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3664 Log("CreateRootDir: Create failed\n");
3668 length = Length(&rootdir->dirHandle);
3669 DZap((void *)&rootdir->dirHandle);
3671 /* create the new root dir vnode */
3672 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3674 Log("CreateRootDir: malloc failed\n");
3678 /* only give 'rl' permissions to 'system:administrators'. We do this to
3679 * try to catch the attention of an administrator, that they should not
3680 * be writing to this directory or continue to use it. */
3681 ACL = VVnodeDiskACL(rootvnode);
3682 ACL->size = sizeof(struct acl_accessList);
3683 ACL->version = ACL_ACLVERSION;
3687 ACL->entries[0].id = -204; /* system:administrators */
3688 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3690 rootvnode->type = vDirectory;
3691 rootvnode->cloned = 0;
3692 rootvnode->modeBits = 0777;
3693 rootvnode->linkCount = 2;
3694 VNDISK_SET_LEN(rootvnode, length);
3695 rootvnode->uniquifier = 1;
3696 rootvnode->dataVersion = dv;
3697 VNDISK_SET_INO(rootvnode, rootinode);
3698 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3699 rootvnode->author = 0;
3700 rootvnode->owner = 0;
3701 rootvnode->parent = 0;
3702 rootvnode->group = 0;
3703 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3705 /* write it out to disk */
3706 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3707 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3708 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3710 if (bytes != SIZEOF_LARGEDISKVNODE) {
3711 /* just cast to int and don't worry about printing real 64-bit ints;
3712 * a large disk vnode isn't anywhere near the 32-bit limit */
3713 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3714 (int)SIZEOF_LARGEDISKVNODE);
3718 /* update VnodeEssence for the new root vnode */
3719 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3721 vep->blockCount = nBlocks(length);
3722 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3723 vep->parent = rootvnode->parent;
3724 vep->unique = rootvnode->uniquifier;
3725 vep->modeBits = rootvnode->modeBits;
3726 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3727 vep->type = rootvnode->type;
3728 vep->author = rootvnode->author;
3729 vep->owner = rootvnode->owner;
3730 vep->group = rootvnode->group;
3740 /* update DirSummary for the new root vnode */
3741 rootdir->vnodeNumber = 1;
3742 rootdir->unique = 1;
3743 rootdir->haveDot = 1;
3744 rootdir->haveDotDot = 1;
3745 rootdir->rwVid = vid;
3746 rootdir->copied = 0;
3747 rootdir->parent = 0;
3748 rootdir->name = strdup(".");
3749 rootdir->vname = volHeader->name;
3750 rootdir->ds_linkH = alinkH;
3757 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3758 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3760 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3761 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3771 * salvage a volume group.
3773 * @param[in] salvinfo information for the curent salvage job
3774 * @param[in] rwIsp inode summary for rw volume
3775 * @param[in] alinkH link table inode handle
3777 * @return operation status
3781 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3783 /* This routine, for now, will only be called for read-write volumes */
3785 int BlocksInVolume = 0, FilesInVolume = 0;
3787 struct DirSummary rootdir, oldrootdir;
3788 struct VnodeInfo *dirVnodeInfo;
3789 struct VnodeDiskObject vnode;
3790 VolumeDiskData volHeader;
3792 int orphaned, rootdirfound = 0;
3793 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3794 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3795 struct VnodeEssence *vep;
3798 afs_sfsize_t nBytes;
3800 VnodeId LFVnode, ThisVnode;
3801 Unique LFUnique, ThisUnique;
3805 vid = rwIsp->volSummary->header.id;
3806 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3807 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3808 osi_Assert(nBytes == sizeof(volHeader));
3809 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3810 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3811 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3813 DistilVnodeEssence(salvinfo, vid, vLarge,
3814 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3815 DistilVnodeEssence(salvinfo, vid, vSmall,
3816 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3818 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3819 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3820 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3821 &rootdir, &rootdirfound);
3824 nt_sync(salvinfo->fileSysDevice);
3826 sync(); /* This used to be done lower level, for every dir */
3833 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3835 Log("Cannot find root directory for volume %lu; attempting to create "
3836 "a new one\n", afs_printable_uint32_lu(vid));
3838 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3843 salvinfo->VolumeChanged = 1;
3847 /* Parse each vnode looking for orphaned vnodes and
3848 * connect them to the tree as orphaned (if requested).
3850 oldrootdir = rootdir;
3851 for (class = 0; class < nVNODECLASSES; class++) {
3852 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3853 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3854 ThisVnode = bitNumberToVnodeNumber(v, class);
3855 ThisUnique = vep->unique;
3857 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3858 continue; /* Ignore unused, claimed, and root vnodes */
3860 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3861 * entry in this vnode had incremented the parent link count (In
3862 * JudgeEntry()). We need to go to the parent and decrement that
3863 * link count. But if the parent's unique is zero, then the parent
3864 * link count was not incremented in JudgeEntry().
3866 if (class == vLarge) { /* directory vnode */
3867 pv = vnodeIdToBitNumber(vep->parent);
3868 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3869 if (vep->parent == 1 && newrootdir) {
3870 /* this vnode's parent was the volume root, and
3871 * we just created the volume root. So, the parent
3872 * dir didn't exist during JudgeEntry, so the link
3873 * count was not inc'd there, so don't dec it here.
3879 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3885 continue; /* If no rootdir, can't attach orphaned files */
3887 /* Here we attach orphaned files and directories into the
3888 * root directory, LVVnode, making sure link counts stay correct.
3890 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3891 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3892 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3894 /* Update this orphaned vnode's info. Its parent info and
3895 * link count (do for orphaned directories and files).
3897 vep->parent = LFVnode; /* Parent is the root dir */
3898 vep->unique = LFUnique;
3901 vep->count--; /* Inc link count (root dir will pt to it) */
3903 /* If this orphaned vnode is a directory, change '..'.
3904 * The name of the orphaned dir/file is unknown, so we
3905 * build a unique name. No need to CopyOnWrite the directory
3906 * since it is not connected to tree in BK or RO volume and
3907 * won't be visible there.
3909 if (class == vLarge) {
3913 /* Remove and recreate the ".." entry in this orphaned directory */
3914 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3915 salvinfo->vnodeInfo[class].inodes[v],
3916 &salvinfo->VolumeChanged);
3918 pa.Unique = LFUnique;
3919 osi_Assert(Delete(&dh, "..") == 0);
3920 osi_Assert(Create(&dh, "..", &pa) == 0);
3922 /* The original parent's link count was decremented above.
3923 * Here we increment the new parent's link count.
3925 pv = vnodeIdToBitNumber(LFVnode);
3926 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3930 /* Go to the root dir and add this entry. The link count of the
3931 * root dir was incremented when ".." was created. Try 10 times.
3933 for (j = 0; j < 10; j++) {
3934 pa.Vnode = ThisVnode;
3935 pa.Unique = ThisUnique;
3937 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3939 vLarge) ? "__ORPHANDIR__" :
3940 "__ORPHANFILE__"), ThisVnode,
3943 CopyOnWrite(salvinfo, &rootdir);
3944 code = Create(&rootdir.dirHandle, npath, &pa);
3948 ThisUnique += 50; /* Try creating a different file */
3950 osi_Assert(code == 0);
3951 Log("Attaching orphaned %s to volume's root dir as %s\n",
3952 ((class == vLarge) ? "directory" : "file"), npath);
3954 } /* for each vnode in the class */
3955 } /* for each class of vnode */
3957 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3959 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3961 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3963 osi_Assert(code == 0);
3964 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3967 DFlush(); /* Flush the changes */
3968 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3969 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3970 orphans = ORPH_IGNORE;
3973 /* Write out all changed vnodes. Orphaned files and directories
3974 * will get removed here also (if requested).
3976 for (class = 0; class < nVNODECLASSES; class++) {
3977 int nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3978 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3979 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3980 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
3981 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
3982 for (i = 0; i < nVnodes; i++) {
3983 struct VnodeEssence *vnp = &vnodes[i];
3984 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3986 /* If the vnode is good but is unclaimed (not listed in
3987 * any directory entries), then it is orphaned.
3990 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
3991 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3995 if (vnp->changed || vnp->count) {
3998 IH_IREAD(salvinfo->vnodeInfo[class].handle,
3999 vnodeIndexOffset(vcp, vnodeNumber),
4000 (char *)&vnode, sizeof(vnode));
4001 osi_Assert(nBytes == sizeof(vnode));
4003 vnode.parent = vnp->parent;
4004 oldCount = vnode.linkCount;
4005 vnode.linkCount = vnode.linkCount - vnp->count;
4008 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4010 if (!vnp->todelete) {
4011 /* Orphans should have already been attached (if requested) */
4012 osi_Assert(orphans != ORPH_ATTACH);
4013 oblocks += vnp->blockCount;
4016 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4018 BlocksInVolume -= vnp->blockCount;
4020 if (VNDISK_GET_INO(&vnode)) {
4022 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4023 osi_Assert(code == 0);
4025 memset(&vnode, 0, sizeof(vnode));
4027 } else if (vnp->count) {
4029 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4032 vnode.modeBits = vnp->modeBits;
4035 vnode.dataVersion++;
4038 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4039 vnodeIndexOffset(vcp, vnodeNumber),
4040 (char *)&vnode, sizeof(vnode));
4041 osi_Assert(nBytes == sizeof(vnode));
4043 salvinfo->VolumeChanged = 1;
4047 if (!Showmode && ofiles) {
4048 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4050 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4054 for (class = 0; class < nVNODECLASSES; class++) {
4055 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4056 for (i = 0; i < vip->nVnodes; i++)
4057 if (vip->vnodes[i].name)
4058 free(vip->vnodes[i].name);
4065 /* Set correct resource utilization statistics */
4066 volHeader.filecount = FilesInVolume;
4067 volHeader.diskused = BlocksInVolume;
4069 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4070 if (volHeader.uniquifier < (maxunique + 1)) {
4072 Log("Volume uniquifier is too low; fixed\n");
4073 /* Plus 2,000 in case there are workstations out there with
4074 * cached vnodes that have since been deleted
4076 volHeader.uniquifier = (maxunique + 1 + 2000);
4080 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4081 "Only use this salvaged volume to copy data to another volume; "
4082 "do not continue to use this volume (%lu) as-is.\n",
4083 afs_printable_uint32_lu(vid));
4086 #ifdef FSSYNC_BUILD_CLIENT
4087 if (!Testing && salvinfo->VolumeChanged) {
4088 afs_int32 fsync_code;
4090 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4092 Log("Error trying to tell the fileserver to break callbacks for "
4093 "changed volume %lu; error code %ld\n",
4094 afs_printable_uint32_lu(vid),
4095 afs_printable_int32_ld(fsync_code));
4097 salvinfo->VolumeChanged = 0;
4100 #endif /* FSSYNC_BUILD_CLIENT */
4102 /* Turn off the inUse bit; the volume's been salvaged! */
4103 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4104 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4105 volHeader.inService = 1; /* allow service again */
4106 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4107 volHeader.dontSalvage = DONT_SALVAGE;
4108 salvinfo->VolumeChanged = 0;
4110 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4111 osi_Assert(nBytes == sizeof(volHeader));
4114 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4115 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4116 FilesInVolume, BlocksInVolume);
4119 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4120 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4126 ClearROInUseBit(struct VolumeSummary *summary)
4128 IHandle_t *h = summary->volumeInfoHandle;
4129 afs_sfsize_t nBytes;
4131 VolumeDiskData volHeader;
4133 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4134 osi_Assert(nBytes == sizeof(volHeader));
4135 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4136 volHeader.inUse = 0;
4137 volHeader.needsSalvaged = 0;
4138 volHeader.inService = 1;
4139 volHeader.dontSalvage = DONT_SALVAGE;
4141 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4142 osi_Assert(nBytes == sizeof(volHeader));
4147 * Possible delete the volume.
4149 * deleteMe - Always do so, only a partial volume.
4152 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4153 char *message, int deleteMe, int check)
4155 if (readOnly(isp) || deleteMe) {
4156 if (isp->volSummary && isp->volSummary->fileName) {
4159 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4161 Log("It will be deleted on this server (you may find it elsewhere)\n");
4164 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4166 Log("it will be deleted instead. It should be recloned.\n");
4171 sprintf(path, "%s/%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4173 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4175 Log("Error %ld destroying volume disk header for volume %lu\n",
4176 afs_printable_int32_ld(code),
4177 afs_printable_uint32_lu(isp->volumeId));
4180 /* make sure we actually delete the fileName file; ENOENT
4181 * is fine, since VDestroyVolumeDiskHeader probably already
4183 if (unlink(path) && errno != ENOENT) {
4184 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4188 } else if (!check) {
4189 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4191 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4195 #ifdef AFS_DEMAND_ATTACH_FS
4197 * Locks a volume on disk for salvaging.
4199 * @param[in] volumeId volume ID to lock
4201 * @return operation status
4203 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4204 * checked out and locked again
4209 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4214 /* should always be WRITE_LOCK, but keep the lock-type logic all
4215 * in one place, in VVolLockType. Params will be ignored, but
4216 * try to provide what we're logically doing. */
4217 locktype = VVolLockType(V_VOLUPD, 1);
4219 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4221 if (code == EBUSY) {
4222 Abort("Someone else appears to be using volume %lu; Aborted\n",
4223 afs_printable_uint32_lu(volumeId));
4225 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4226 afs_printable_int32_ld(code),
4227 afs_printable_uint32_lu(volumeId));
4230 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4231 if (code == SYNC_DENIED) {
4232 /* need to retry checking out volumes */
4235 if (code != SYNC_OK) {
4236 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4237 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4240 /* set inUse = programType in the volume header to ensure that nobody
4241 * tries to use this volume again without salvaging, if we somehow crash
4242 * or otherwise exit before finishing the salvage.
4246 struct VolumeHeader header;
4247 struct VolumeDiskHeader diskHeader;
4248 struct VolumeDiskData volHeader;
4250 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4255 DiskToVolumeHeader(&header, &diskHeader);
4257 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4258 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4259 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4265 volHeader.inUse = programType;
4267 /* If we can't re-write the header, bail out and error. We don't
4268 * assert when reading the header, since it's possible the
4269 * header isn't really there (when there's no data associated
4270 * with the volume; we just delete the vol header file in that
4271 * case). But if it's there enough that we can read it, but
4272 * somehow we cannot write to it to signify we're salvaging it,
4273 * we've got a big problem and we cannot continue. */
4274 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4281 #endif /* AFS_DEMAND_ATTACH_FS */
4284 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4289 memset(&res, 0, sizeof(res));
4291 for (i = 0; i < 3; i++) {
4292 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4293 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4295 if (code == SYNC_OK) {
4297 } else if (code == SYNC_DENIED) {
4298 #ifdef DEMAND_ATTACH_ENABLE
4299 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4301 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4303 Abort("Salvage aborted\n");
4304 } else if (code == SYNC_BAD_COMMAND) {
4305 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4307 #ifdef DEMAND_ATTACH_ENABLE
4308 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4310 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4312 Abort("Salvage aborted\n");
4315 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4316 FSYNC_clientFinis();
4320 if (code != SYNC_OK) {
4321 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4322 Abort("Salvage aborted\n");
4327 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4331 for (i = 0; i < 3; i++) {
4332 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4333 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4335 if (code == SYNC_OK) {
4337 } else if (code == SYNC_DENIED) {
4338 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4339 } else if (code == SYNC_BAD_COMMAND) {
4340 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4342 #ifdef DEMAND_ATTACH_ENABLE
4343 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4345 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4350 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4351 FSYNC_clientFinis();
4358 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4360 /* Volume parameter is passed in case iopen is upgraded in future to
4361 * require a volume Id to be passed
4364 IHandle_t *srcH, *destH;
4365 FdHandle_t *srcFdP, *destFdP;
4367 afs_foff_t size = 0;
4369 IH_INIT(srcH, device, rwvolume, inode1);
4370 srcFdP = IH_OPEN(srcH);
4371 osi_Assert(srcFdP != NULL);
4372 IH_INIT(destH, device, rwvolume, inode2);
4373 destFdP = IH_OPEN(destH);
4374 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4375 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4378 osi_Assert(nBytes == 0);
4379 FDH_REALLYCLOSE(srcFdP);
4380 FDH_REALLYCLOSE(destFdP);
4387 PrintInodeList(struct SalvInfo *salvinfo)
4389 struct ViceInodeInfo *ip;
4390 struct ViceInodeInfo *buf;
4391 struct afs_stat status;
4395 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4396 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4397 osi_Assert(buf != NULL);
4398 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4399 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4400 for (ip = buf; nInodes--; ip++) {
4401 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4402 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4403 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4404 ip->u.param[2], ip->u.param[3]);
4410 PrintInodeSummary(struct SalvInfo *salvinfo)
4413 struct InodeSummary *isp;
4415 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4416 isp = &salvinfo->inodeSummary[i];
4417 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4422 PrintVolumeSummary(struct SalvInfo *salvinfo)
4425 struct VolumeSummary *vsp;
4427 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4428 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4438 osi_Assert(0); /* Fork is never executed in the NT code path */
4442 #ifdef AFS_DEMAND_ATTACH_FS
4443 if ((f == 0) && (programType == salvageServer)) {
4444 /* we are a salvageserver child */
4445 #ifdef FSSYNC_BUILD_CLIENT
4446 VChildProcReconnectFS_r();
4448 #ifdef SALVSYNC_BUILD_CLIENT
4452 #endif /* AFS_DEMAND_ATTACH_FS */
4453 #endif /* !AFS_NT40_ENV */
4463 #ifdef AFS_DEMAND_ATTACH_FS
4464 if (programType == salvageServer) {
4465 #ifdef SALVSYNC_BUILD_CLIENT
4468 #ifdef FSSYNC_BUILD_CLIENT
4472 #endif /* AFS_DEMAND_ATTACH_FS */
4475 if (main_thread != pthread_self())
4476 pthread_exit((void *)code);
4489 pid = wait(&status);
4490 osi_Assert(pid != -1);
4491 if (WCOREDUMP(status))
4492 Log("\"%s\" core dumped!\n", prog);
4493 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4499 TimeStamp(time_t clock, int precision)
4502 static char timestamp[20];
4503 lt = localtime(&clock);
4505 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4507 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4512 CheckLogFile(char * log_path)
4514 char oldSlvgLog[AFSDIR_PATH_MAX];
4516 #ifndef AFS_NT40_ENV
4523 strcpy(oldSlvgLog, log_path);
4524 strcat(oldSlvgLog, ".old");
4526 renamefile(log_path, oldSlvgLog);
4527 logFile = afs_fopen(log_path, "a");
4529 if (!logFile) { /* still nothing, use stdout */
4533 #ifndef AFS_NAMEI_ENV
4534 AFS_DEBUG_IOPS_LOG(logFile);
4539 #ifndef AFS_NT40_ENV
4541 TimeStampLogFile(char * log_path)
4543 char stampSlvgLog[AFSDIR_PATH_MAX];
4548 lt = localtime(&now);
4549 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4550 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4551 log_path, lt->tm_year + 1900,
4552 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4555 /* try to link the logfile to a timestamped filename */
4556 /* if it fails, oh well, nothing we can do */
4557 link(log_path, stampSlvgLog);
4566 #ifndef AFS_NT40_ENV
4568 printf("Can't show log since using syslog.\n");
4579 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4582 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4585 while (fgets(line, sizeof(line), logFile))
4592 Log(const char *format, ...)
4598 va_start(args, format);
4599 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4601 #ifndef AFS_NT40_ENV
4603 syslog(LOG_INFO, "%s", tmp);
4607 gettimeofday(&now, 0);
4608 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4614 Abort(const char *format, ...)
4619 va_start(args, format);
4620 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4622 #ifndef AFS_NT40_ENV
4624 syslog(LOG_INFO, "%s", tmp);
4628 fprintf(logFile, "%s", tmp);
4640 ToString(const char *s)
4643 p = (char *)malloc(strlen(s) + 1);
4644 osi_Assert(p != NULL);
4649 /* Remove the FORCESALVAGE file */
4651 RemoveTheForce(char *path)
4654 struct afs_stat force; /* so we can use afs_stat to find it */
4655 strcpy(target,path);
4656 strcat(target,"/FORCESALVAGE");
4657 if (!Testing && ForceSalvage) {
4658 if (afs_stat(target,&force) == 0) unlink(target);
4662 #ifndef AFS_AIX32_ENV
4664 * UseTheForceLuke - see if we can use the force
4667 UseTheForceLuke(char *path)
4669 struct afs_stat force;
4671 strcpy(target,path);
4672 strcat(target,"/FORCESALVAGE");
4674 return (afs_stat(target, &force) == 0);
4678 * UseTheForceLuke - see if we can use the force
4681 * The VRMIX fsck will not muck with the filesystem it is supposedly
4682 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4683 * muck directly with the root inode, which is within the normal
4685 * ListViceInodes() has a side effect of setting ForceSalvage if
4686 * it detects a need, based on root inode examination.
4689 UseTheForceLuke(char *path)
4692 return 0; /* sorry OB1 */
4697 /* NT support routines */
4699 static char execpathname[MAX_PATH];
4701 nt_SalvagePartition(char *partName, int jobn)
4706 if (!*execpathname) {
4707 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4708 if (!n || n == 1023)
4711 job.cj_magic = SALVAGER_MAGIC;
4712 job.cj_number = jobn;
4713 (void)strcpy(job.cj_part, partName);
4714 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4719 nt_SetupPartitionSalvage(void *datap, int len)
4721 childJob_t *jobp = (childJob_t *) datap;
4722 char logname[AFSDIR_PATH_MAX];
4724 if (len != sizeof(childJob_t))
4726 if (jobp->cj_magic != SALVAGER_MAGIC)
4731 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4733 logFile = afs_fopen(logname, "w");
4741 #endif /* AFS_NT40_ENV */