2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
193 #include "vol_internal.h"
195 #include <afs/prs_fs.h>
197 #ifdef FSSYNC_BUILD_CLIENT
198 #include "vg_cache.h"
205 /*@+fcnmacros +macrofcndecl@*/
208 extern off64_t afs_lseek(int FD, off64_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
211 #define afs_stat stat64
212 #define afs_fstat fstat64
213 #define afs_open open64
214 #define afs_fopen fopen64
215 #else /* !O_LARGEFILE */
217 extern off_t afs_lseek(int FD, off_t O, int F);
218 #endif /*S_SPLINT_S */
219 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
220 #define afs_stat stat
221 #define afs_fstat fstat
222 #define afs_open open
223 #define afs_fopen fopen
224 #endif /* !O_LARGEFILE */
225 /*@=fcnmacros =macrofcndecl@*/
228 extern void *calloc();
230 static char *TimeStamp(time_t clock, int precision);
233 int debug; /* -d flag */
234 extern int Testing; /* -n flag */
235 int ListInodeOption; /* -i flag */
236 int ShowRootFiles; /* -r flag */
237 int RebuildDirs; /* -sal flag */
238 int Parallel = 4; /* -para X flag */
239 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
240 int forceR = 0; /* -b flag */
241 int ShowLog = 0; /* -showlog flag */
242 int ShowSuid = 0; /* -showsuid flag */
243 int ShowMounts = 0; /* -showmounts flag */
244 int orphans = ORPH_IGNORE; /* -orphans option */
249 int useSyslog = 0; /* -syslog flag */
250 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
259 #define MAXPARALLEL 32
261 int OKToZap; /* -o flag */
262 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
263 * in the volume header */
265 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
267 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
270 * information that is 'global' to a particular salvage job.
273 Device fileSysDevice; /**< The device number of the current partition
275 char fileSysPath[8]; /**< The path of the mounted partition currently
276 * being salvaged, i.e. the directory containing
277 * the volume headers */
278 char *fileSysPathName; /**< NT needs this to make name pretty log. */
279 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
280 int VGLinkH_cnt; /**< # of references to lnk handle. */
281 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
284 char *fileSysDeviceName; /**< The block device where the file system being
285 * salvaged was mounted */
286 char *filesysfulldev;
288 int VolumeChanged; /**< Set by any routine which would change the
289 * volume in a way which would require callbacks
290 * to be broken if the volume was put back on
291 * on line by an active file server */
293 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
294 * header dealt with */
296 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
297 int inodeFd; /**< File descriptor for inode file */
299 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
300 int nVolumes; /**< Number of volumes (read-write and read-only)
301 * in volume summary */
302 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
305 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
306 * vnodes in the volume that
307 * we are currently looking
315 /* Forward declarations */
316 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
317 static int AskVolumeSummary(struct SalvInfo *salvinfo,
318 VolumeId singleVolumeNumber);
320 #ifdef AFS_DEMAND_ATTACH_FS
321 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
322 #endif /* AFS_DEMAND_ATTACH_FS */
324 /* Uniquifier stored in the Inode */
329 return (u & 0x3fffff);
331 #if defined(AFS_SGI_EXMAG)
332 return (u & SGI_UNIQMASK);
335 #endif /* AFS_SGI_EXMAG */
342 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
344 return 0; /* otherwise may be transient, e.g. EMFILE */
349 char *save_args[MAX_ARGS];
351 extern pthread_t main_thread;
352 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
356 * Get the salvage lock if not already held. Hold until process exits.
358 * @param[in] locktype READ_LOCK or WRITE_LOCK
361 _ObtainSalvageLock(int locktype)
363 struct VLockFile salvageLock;
368 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
370 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
373 "salvager: There appears to be another salvager running! "
378 "salvager: Error %d trying to acquire salvage lock! "
384 ObtainSalvageLock(void)
386 _ObtainSalvageLock(WRITE_LOCK);
389 ObtainSharedSalvageLock(void)
391 _ObtainSalvageLock(READ_LOCK);
395 #ifdef AFS_SGI_XFS_IOPS_ENV
396 /* Check if the given partition is mounted. For XFS, the root inode is not a
397 * constant. So we check the hard way.
400 IsPartitionMounted(char *part)
403 struct mntent *mntent;
405 assert(mntfp = setmntent(MOUNTED, "r"));
406 while (mntent = getmntent(mntfp)) {
407 if (!strcmp(part, mntent->mnt_dir))
412 return mntent ? 1 : 1;
415 /* Check if the given inode is the root of the filesystem. */
416 #ifndef AFS_SGI_XFS_IOPS_ENV
418 IsRootInode(struct afs_stat *status)
421 * The root inode is not a fixed value in XFS partitions. So we need to
422 * see if the partition is in the list of mounted partitions. This only
423 * affects the SalvageFileSys path, so we check there.
425 return (status->st_ino == ROOTINODE);
430 #ifndef AFS_NAMEI_ENV
431 /* We don't want to salvage big files filesystems, since we can't put volumes on
435 CheckIfBigFilesFS(char *mountPoint, char *devName)
437 struct superblock fs;
440 if (strncmp(devName, "/dev/", 5)) {
441 (void)sprintf(name, "/dev/%s", devName);
443 (void)strcpy(name, devName);
446 if (ReadSuper(&fs, name) < 0) {
447 Log("Unable to read superblock. Not salvaging partition %s.\n",
451 if (IsBigFilesFileSystem(&fs)) {
452 Log("Partition %s is a big files filesystem, not salvaging.\n",
462 #define HDSTR "\\Device\\Harddisk"
463 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
465 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
470 static int dowarn = 1;
472 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
474 if (strncmp(res, HDSTR, HDLEN)) {
477 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
478 res, HDSTR, p1->devName);
482 d1 = atoi(&res[HDLEN]);
484 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
486 if (strncmp(res, HDSTR, HDLEN)) {
489 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
490 res, HDSTR, p2->devName);
494 d2 = atoi(&res[HDLEN]);
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, '/');
720 ptr = (char *)strrchr(pbuffer, '/');
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s\\", salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 AskOffline(salvinfo, singleVolumeNumber);
794 #ifdef AFS_DEMAND_ATTACH_FS
795 if (LockVolume(salvinfo, singleVolumeNumber)) {
798 #endif /* AFS_DEMAND_ATTACH_FS */
801 VLockPartition(partP->name);
805 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
808 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
809 partP->name, name, (Testing ? "(READONLY mode)" : ""));
811 Log("***Forced salvage of all volumes on this partition***\n");
816 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
823 assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
824 while ((dp = readdir(dirp))) {
825 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
826 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
828 Log("Removing old salvager temp files %s\n", dp->d_name);
829 strcpy(npath, salvinfo->fileSysPath);
831 strcat(npath, dp->d_name);
837 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
839 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
840 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
842 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
846 inodeFile = fopen(inodeListPath, "w+b");
848 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
851 /* Using nt_unlink here since we're really using the delete on close
852 * semantics of unlink. In most places in the salvager, we really do
853 * mean to unlink the file at that point. Those places have been
854 * modified to actually do that so that the NT crt can be used there.
856 code = nt_unlink(inodeListPath);
858 code = unlink(inodeListPath);
861 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
864 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
868 salvinfo->inodeFd = fileno(inodeFile);
869 if (salvinfo->inodeFd == -1)
870 Abort("Temporary file %s is missing...\n", inodeListPath);
871 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
872 if (ListInodeOption) {
873 PrintInodeList(salvinfo);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
929 #ifdef AFS_DEMAND_ATTACH_FS
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS */
934 AskOnline(salvinfo, singleVolumeNumber);
936 /* Step through the volumeSummary list and set all volumes on-line.
937 * The volumes were taken off-line in GetVolumeSummary.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
944 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
945 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
948 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
952 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
955 sprintf(path, "%s/%s", salvinfo->fileSysPath, vsp->fileName);
958 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
961 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
963 Log("Error %ld destroying volume disk header for volume %lu\n",
964 afs_printable_int32_ld(code),
965 afs_printable_uint32_lu(vsp->header.id));
968 /* make sure we actually delete the fileName file; ENOENT
969 * is fine, since VDestroyVolumeDiskHeader probably already
971 if (unlink(path) && errno != ENOENT) {
972 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 CompareInodes(const void *_p1, const void *_p2)
981 const struct ViceInodeInfo *p1 = _p1;
982 const struct ViceInodeInfo *p2 = _p2;
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 VolumeId p1rwid, p2rwid;
987 (p1->u.vnode.vnodeNumber ==
988 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
990 (p2->u.vnode.vnodeNumber ==
991 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
999 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1000 if (p1->u.vnode.volumeId == p1rwid)
1002 if (p2->u.vnode.volumeId == p2rwid)
1004 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1006 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1007 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1008 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1010 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1012 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1014 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1016 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1018 /* The following tests are reversed, so that the most desirable
1019 * of several similar inodes comes first */
1020 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1076 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1077 struct InodeSummary *summary)
1079 VolumeId volume = ip->u.vnode.volumeId;
1080 VolumeId rwvolume = volume;
1085 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1087 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1089 rwvolume = ip->u.special.parentId;
1090 /* This isn't quite right, as there could (in error) be different
1091 * parent inodes in different special vnodes */
1093 if (maxunique < ip->u.vnode.vnodeUniquifier)
1094 maxunique = ip->u.vnode.vnodeUniquifier;
1098 summary->volumeId = volume;
1099 summary->RWvolumeId = rwvolume;
1100 summary->nInodes = n;
1101 summary->nSpecialInodes = nSpecial;
1102 summary->maxUniquifier = maxunique;
1106 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1108 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1109 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1110 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1115 * Collect list of inodes in file named by path. If a truly fatal error,
1116 * unlink the file and abort. For lessor errors, return -1. The file will
1117 * be unlinked by the caller.
1120 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1122 struct afs_stat status;
1125 struct ViceInodeInfo *ip, *ip_save;
1126 struct InodeSummary summary;
1127 char summaryFileName[50];
1130 char *dev = salvinfo->fileSysPath;
1131 char *wpath = salvinfo->fileSysPath;
1133 char *dev = salvinfo->fileSysDeviceName;
1134 char *wpath = salvinfo->filesysfulldev;
1136 char *part = salvinfo->fileSysPath;
1140 /* This file used to come from vfsck; cobble it up ourselves now... */
1142 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1143 singleVolumeNumber ? OnlyOneVolume : 0,
1144 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1146 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1149 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1151 if (forceSal && !ForceSalvage) {
1152 Log("***Forced salvage of all volumes on this partition***\n");
1155 fseek(inodeFile, 0L, SEEK_SET);
1156 salvinfo->inodeFd = fileno(inodeFile);
1157 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1158 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1160 tdir = (tmpdir ? tmpdir : part);
1162 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1163 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1165 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1166 "%s/salvage.temp.%d", tdir, getpid());
1168 summaryFile = afs_fopen(summaryFileName, "a+");
1169 if (summaryFile == NULL) {
1170 Abort("Unable to create inode summary file\n");
1174 /* Using nt_unlink here since we're really using the delete on close
1175 * semantics of unlink. In most places in the salvager, we really do
1176 * mean to unlink the file at that point. Those places have been
1177 * modified to actually do that so that the NT crt can be used there.
1179 code = nt_unlink(summaryFileName);
1181 code = unlink(summaryFileName);
1184 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1187 if (!canfork || debug || Fork() == 0) {
1189 unsigned long st_size=(unsigned long) status.st_size;
1190 nInodes = st_size / sizeof(struct ViceInodeInfo);
1192 fclose(summaryFile);
1193 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1194 RemoveTheForce(salvinfo->fileSysPath);
1196 struct VolumeSummary *vsp;
1199 GetVolumeSummary(salvinfo, singleVolumeNumber);
1201 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1203 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1206 Log("%s vice inodes on %s; not salvaged\n",
1207 singleVolumeNumber ? "No applicable" : "No", dev);
1210 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1212 fclose(summaryFile);
1214 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1217 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1218 fclose(summaryFile);
1219 Abort("Unable to read inode table; %s not salvaged\n", dev);
1221 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1222 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1223 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1230 CountVolumeInodes(ip, nInodes, &summary);
1231 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1232 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1233 fclose(summaryFile);
1236 summary.index += (summary.nInodes);
1237 nInodes -= summary.nInodes;
1238 ip += summary.nInodes;
1241 ip = ip_save = NULL;
1242 /* Following fflush is not fclose, because if it was debug mode would not work */
1243 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1244 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1245 fclose(summaryFile);
1248 if (canfork && !debug) {
1253 if (Wait("Inode summary") == -1) {
1254 fclose(summaryFile);
1255 Exit(1); /* salvage of this partition aborted */
1258 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1259 if (status.st_size != 0) {
1261 unsigned long st_status=(unsigned long)status.st_size;
1262 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1263 assert(salvinfo->inodeSummary != NULL);
1264 /* For GNU we need to do lseek to get the file pointer moved. */
1265 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1266 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1267 assert(ret == st_status);
1269 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1270 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1271 salvinfo->inodeSummary[i].volSummary = NULL;
1273 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1274 fclose(summaryFile);
1278 /* Comparison routine for volume sort.
1279 This is setup so that a read-write volume comes immediately before
1280 any read-only clones of that volume */
1282 CompareVolumes(const void *_p1, const void *_p2)
1284 const struct VolumeSummary *p1 = _p1;
1285 const struct VolumeSummary *p2 = _p2;
1286 if (p1->header.parent != p2->header.parent)
1287 return p1->header.parent < p2->header.parent ? -1 : 1;
1288 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1290 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1292 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1296 * Gleans volumeSummary information by asking the fileserver
1298 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1299 * salvaging a whole partition
1301 * @return whether we obtained the volume summary information or not
1302 * @retval 0 success; we obtained the volume summary information
1303 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1305 * @retval 1 we did not get the volume summary information; either the
1306 * fileserver responded with an error, or we are not supposed to
1307 * ask the fileserver for the information (e.g. we are salvaging
1308 * the entire partition or we are not the salvageserver)
1310 * @note for non-DAFS, always returns 1
1313 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1316 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1317 if (programType == salvageServer) {
1318 if (singleVolumeNumber) {
1319 FSSYNC_VGQry_response_t q_res;
1321 struct VolumeSummary *vsp;
1323 struct VolumeDiskHeader diskHdr;
1325 memset(&res, 0, sizeof(res));
1327 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1330 * We must wait for the partition to finish scanning before
1331 * can continue, since we will not know if we got the entire
1332 * VG membership unless the partition is fully scanned.
1333 * We could, in theory, just scan the partition ourselves if
1334 * the VG cache is not ready, but we would be doing the exact
1335 * same scan the fileserver is doing; it will almost always
1336 * be faster to wait for the fileserver. The only exceptions
1337 * are if the partition does not take very long to scan, and
1338 * in that case it's fast either way, so who cares?
1340 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1341 Log("waiting for fileserver to finish scanning partition %s...\n",
1342 salvinfo->fileSysPartition->name);
1344 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1345 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1346 * just so small partitions don't need to wait over 10
1347 * seconds every time, and large partitions are generally
1348 * polled only once every ten seconds. */
1349 sleep((i > 10) ? (i = 10) : i);
1351 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1355 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1356 /* This can happen if there's no header for the volume
1357 * we're salvaging, or no headers exist for the VG (if
1358 * we're salvaging an RW). Act as if we got a response
1359 * with no VG members. The headers may be created during
1360 * salvaging, if there are inodes in this VG. */
1362 memset(&q_res, 0, sizeof(q_res));
1363 q_res.rw = singleVolumeNumber;
1367 Log("fileserver refused VGCQuery request for volume %lu on "
1368 "partition %s, code %ld reason %ld\n",
1369 afs_printable_uint32_lu(singleVolumeNumber),
1370 salvinfo->fileSysPartition->name,
1371 afs_printable_int32_ld(code),
1372 afs_printable_int32_ld(res.hdr.reason));
1376 if (q_res.rw != singleVolumeNumber) {
1377 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1378 afs_printable_uint32_lu(singleVolumeNumber),
1379 afs_printable_uint32_lu(q_res.rw));
1380 #ifdef SALVSYNC_BUILD_CLIENT
1381 if (SALVSYNC_LinkVolume(q_res.rw,
1383 salvinfo->fileSysPartition->name,
1385 Log("schedule request failed\n");
1387 #endif /* SALVSYNC_BUILD_CLIENT */
1388 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1391 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1392 assert(salvinfo->volumeSummaryp != NULL);
1394 salvinfo->nVolumes = 0;
1395 vsp = salvinfo->volumeSummaryp;
1397 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1398 char name[VMAXPATHLEN];
1400 if (!q_res.children[i]) {
1404 /* AskOffline for singleVolumeNumber was called much earlier */
1405 if (q_res.children[i] != singleVolumeNumber) {
1406 AskOffline(salvinfo, q_res.children[i]);
1407 if (LockVolume(salvinfo, q_res.children[i])) {
1413 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1415 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1416 afs_printable_uint32_lu(q_res.children[i]));
1421 DiskToVolumeHeader(&vsp->header, &diskHdr);
1422 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1423 vsp->fileName = ToString(name);
1424 salvinfo->nVolumes++;
1428 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1433 Log("Cannot get volume summary from fileserver; falling back to scanning "
1434 "entire partition\n");
1437 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1442 * count how many volume headers are found by VWalkVolumeHeaders.
1444 * @param[in] dp the disk partition (unused)
1445 * @param[in] name full path to the .vol header (unused)
1446 * @param[in] hdr the header data (unused)
1447 * @param[in] last whether this is the last try or not (unused)
1448 * @param[in] rock actually an afs_int32*; the running count of how many
1449 * volumes we have found
1454 CountHeader(struct DiskPartition64 *dp, const char *name,
1455 struct VolumeDiskHeader *hdr, int last, void *rock)
1457 afs_int32 *nvols = (afs_int32 *)rock;
1463 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1466 struct SalvageScanParams {
1467 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1468 * vol id of the VG we're salvaging */
1469 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1470 * we're filling in */
1471 afs_int32 nVolumes; /**< # of vols we've encountered */
1472 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1473 * # of vols we've alloc'd memory for) */
1474 int retry; /**< do we need to retry vol lock/checkout? */
1475 struct SalvInfo *salvinfo; /**< salvage job info */
1479 * records volume summary info found from VWalkVolumeHeaders.
1481 * Found volumes are also taken offline if they are in the specific volume
1482 * group we are looking for.
1484 * @param[in] dp the disk partition
1485 * @param[in] name full path to the .vol header
1486 * @param[in] hdr the header data
1487 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1488 * @param[in] rock actually a struct SalvageScanParams*, containing the
1489 * information needed to record the volume summary data
1491 * @return operation status
1493 * @retval -1 volume locking raced with fileserver restart; checking out
1494 * and locking volumes needs to be retried
1495 * @retval 1 volume header is mis-named and should be deleted
1498 RecordHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 char nameShouldBe[64];
1502 struct SalvageScanParams *params;
1503 struct VolumeSummary summary;
1504 VolumeId singleVolumeNumber;
1505 struct SalvInfo *salvinfo;
1507 params = (struct SalvageScanParams *)rock;
1509 singleVolumeNumber = params->singleVolumeNumber;
1510 salvinfo = params->salvinfo;
1512 DiskToVolumeHeader(&summary.header, hdr);
1514 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1515 && summary.header.parent != singleVolumeNumber) {
1517 if (programType == salvageServer) {
1518 #ifdef SALVSYNC_BUILD_CLIENT
1519 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1520 summary.header.id, summary.header.parent);
1521 if (SALVSYNC_LinkVolume(summary.header.parent,
1525 Log("schedule request failed\n");
1528 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1531 Log("%u is a read-only volume; not salvaged\n",
1532 singleVolumeNumber);
1537 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1538 || summary.header.parent == singleVolumeNumber) {
1540 /* check if the header file is incorrectly named */
1542 const char *base = strrchr(name, '/');
1549 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1550 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1553 if (strcmp(nameShouldBe, base)) {
1554 /* .vol file has wrong name; retry/delete */
1558 if (!badname || last) {
1559 /* only offline the volume if the header is good, or if this is
1560 * the last try looking at it; avoid AskOffline'ing the same vol
1563 if (singleVolumeNumber
1564 && summary.header.id != singleVolumeNumber) {
1565 /* don't offline singleVolumeNumber; we already did that
1568 AskOffline(salvinfo, summary.header.id);
1570 #ifdef AFS_DEMAND_ATTACH_FS
1572 /* don't lock the volume if the header is bad, since we're
1573 * about to delete it anyway. */
1574 if (LockVolume(salvinfo, summary.header.id)) {
1579 #endif /* AFS_DEMAND_ATTACH_FS */
1583 if (last && !Showmode) {
1584 Log("Volume header file %s is incorrectly named (should be %s "
1585 "not %s); %sdeleted (it will be recreated later, if "
1586 "necessary)\n", name, nameShouldBe, base,
1587 (Testing ? "it would have been " : ""));
1592 summary.fileName = ToString(base);
1595 if (params->nVolumes > params->totalVolumes) {
1596 /* We found more volumes than we found on the first partition walk;
1597 * apparently something created a volume while we were
1598 * partition-salvaging, or we found more than 20 vols when salvaging a
1599 * particular volume. Abort if we detect this, since other programs
1600 * supposed to not touch the partition while it is partition-salvaging,
1601 * and we shouldn't find more than 20 vols in a VG.
1603 Abort("Found %ld vol headers, but should have found at most %ld! "
1604 "Make sure the volserver/fileserver are not running at the "
1605 "same time as a partition salvage\n",
1606 afs_printable_int32_ld(params->nVolumes),
1607 afs_printable_int32_ld(params->totalVolumes));
1610 memcpy(params->vsp, &summary, sizeof(summary));
1618 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1620 * If the header could not be read in at all, the header is always unlinked.
1621 * If instead RecordHeader said the header was bad (that is, the header file
1622 * is mis-named), we only unlink if we are doing a partition salvage, as
1623 * opposed to salvaging a specific volume group.
1625 * @param[in] dp the disk partition
1626 * @param[in] name full path to the .vol header
1627 * @param[in] hdr header data, or NULL if the header could not be read
1628 * @param[in] rock actually a struct SalvageScanParams*, with some information
1632 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1633 struct VolumeDiskHeader *hdr, void *rock)
1635 struct SalvageScanParams *params;
1638 params = (struct SalvageScanParams *)rock;
1641 /* no header; header is too bogus to read in at all */
1643 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1649 } else if (!params->singleVolumeNumber) {
1650 /* We were able to read in a header, but RecordHeader said something
1651 * was wrong with it. We only unlink those if we are doing a partition
1658 if (dounlink && unlink(name)) {
1659 Log("Error %d while trying to unlink %s\n", errno, name);
1664 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1665 * the fileserver for VG information, or by scanning the /vicepX partition.
1667 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1668 * are salvaging, or 0 if this is a partition
1671 * @return operation status
1673 * @retval -1 we raced with a fileserver restart; checking out and locking
1674 * volumes must be retried
1677 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1679 afs_int32 nvols = 0;
1680 struct SalvageScanParams params;
1683 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1685 /* we successfully got the vol information from the fileserver; no
1686 * need to scan the partition */
1690 /* we need to retry volume checkout */
1694 if (!singleVolumeNumber) {
1695 /* Count how many volumes we have in /vicepX */
1696 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1699 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1704 nvols = VOL_VG_MAX_VOLS;
1707 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1708 assert(salvinfo->volumeSummaryp != NULL);
1710 params.singleVolumeNumber = singleVolumeNumber;
1711 params.vsp = salvinfo->volumeSummaryp;
1712 params.nVolumes = 0;
1713 params.totalVolumes = nvols;
1715 params.salvinfo = salvinfo;
1717 /* walk the partition directory of volume headers and record the info
1718 * about them; unlinking invalid headers */
1719 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1720 UnlinkHeader, ¶ms);
1722 /* we apparently need to retry checking-out/locking volumes */
1726 Abort("Failed to get volume header summary\n");
1728 salvinfo->nVolumes = params.nVolumes;
1730 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1736 /* Find the link table. This should be associated with the RW volume or, if
1737 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1740 FindLinkHandle(struct InodeSummary *isp, int nVols,
1741 struct ViceInodeInfo *allInodes)
1744 struct ViceInodeInfo *ip;
1746 for (i = 0; i < nVols; i++) {
1747 ip = allInodes + isp[i].index;
1748 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1749 if (ip[j].u.special.type == VI_LINKTABLE)
1750 return ip[j].inodeNumber;
1757 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1759 struct versionStamp version;
1762 if (!VALID_INO(ino))
1764 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1765 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1766 if (!VALID_INO(ino))
1768 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1769 isp->RWvolumeId, errno);
1770 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1771 fdP = IH_OPEN(salvinfo->VGLinkH);
1773 Abort("Can't open link table for volume %u (error = %d)\n",
1774 isp->RWvolumeId, errno);
1776 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1777 Abort("Can't truncate link table for volume %u (error = %d)\n",
1778 isp->RWvolumeId, errno);
1780 version.magic = LINKTABLEMAGIC;
1781 version.version = LINKTABLEVERSION;
1783 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1785 Abort("Can't truncate link table for volume %u (error = %d)\n",
1786 isp->RWvolumeId, errno);
1788 FDH_REALLYCLOSE(fdP);
1790 /* If the volume summary exits (i.e., the V*.vol header file exists),
1791 * then set this inode there as well.
1793 if (isp->volSummary)
1794 isp->volSummary->header.linkTable = ino;
1803 SVGParms_t *parms = (SVGParms_t *) arg;
1804 DoSalvageVolumeGroup(parms->salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1809 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1812 pthread_attr_t tattr;
1816 /* Initialize per volume global variables, even if later code does so */
1817 salvinfo->VolumeChanged = 0;
1818 salvinfo->VGLinkH = NULL;
1819 salvinfo->VGLinkH_cnt = 0;
1820 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1822 parms.svgp_inodeSummaryp = isp;
1823 parms.svgp_count = nVols;
1824 parms.svgp_salvinfo = salvinfo;
1825 code = pthread_attr_init(&tattr);
1827 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1831 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1833 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1836 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1838 Log("Failed to create thread to salvage volume group %u\n",
1842 (void)pthread_join(tid, NULL);
1844 #endif /* AFS_NT40_ENV */
1847 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1849 struct ViceInodeInfo *inodes, *allInodes, *ip;
1850 int i, totalInodes, size, salvageTo;
1854 int dec_VGLinkH = 0;
1856 FdHandle_t *fdP = NULL;
1858 salvinfo->VGLinkH_cnt = 0;
1859 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1860 && isp->nSpecialInodes > 0);
1861 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1862 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1865 if (ShowMounts && !haveRWvolume)
1867 if (canfork && !debug && Fork() != 0) {
1868 (void)Wait("Salvage volume group");
1871 for (i = 0, totalInodes = 0; i < nVols; i++)
1872 totalInodes += isp[i].nInodes;
1873 size = totalInodes * sizeof(struct ViceInodeInfo);
1874 inodes = (struct ViceInodeInfo *)malloc(size);
1875 allInodes = inodes - isp->index; /* this would the base of all the inodes
1876 * for the partition, if all the inodes
1877 * had been read into memory */
1879 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1881 assert(read(salvinfo->inodeFd, inodes, size) == size);
1883 /* Don't try to salvage a read write volume if there isn't one on this
1885 salvageTo = haveRWvolume ? 0 : 1;
1887 #ifdef AFS_NAMEI_ENV
1888 ino = FindLinkHandle(isp, nVols, allInodes);
1889 if (VALID_INO(ino)) {
1890 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1891 fdP = IH_OPEN(salvinfo->VGLinkH);
1893 if (!VALID_INO(ino) || fdP == NULL) {
1894 Log("%s link table for volume %u.\n",
1895 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1897 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1900 struct ViceInodeInfo *ip;
1901 CreateLinkTable(salvinfo, isp, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1903 /* Sync fake 1 link counts to the link table, now that it exists */
1905 for (i = 0; i < nVols; i++) {
1906 ip = allInodes + isp[i].index;
1907 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1909 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1911 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1919 FDH_REALLYCLOSE(fdP);
1921 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1924 /* Salvage in reverse order--read/write volume last; this way any
1925 * Inodes not referenced by the time we salvage the read/write volume
1926 * can be picked up by the read/write volume */
1927 /* ACTUALLY, that's not done right now--the inodes just vanish */
1928 for (i = nVols - 1; i >= salvageTo; i--) {
1930 struct InodeSummary *lisp = &isp[i];
1931 #ifdef AFS_NAMEI_ENV
1932 /* If only the RO is present on this partition, the link table
1933 * shows up as a RW volume special file. Need to make sure the
1934 * salvager doesn't try to salvage the non-existent RW.
1936 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1937 /* If this only special inode is the link table, continue */
1938 if (inodes->u.special.type == VI_LINKTABLE) {
1945 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1946 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1947 /* Check inodes twice. The second time do things seriously. This
1948 * way the whole RO volume can be deleted, below, if anything goes wrong */
1949 for (check = 1; check >= 0; check--) {
1951 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1953 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1954 if (rw && deleteMe) {
1955 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1956 * volume won't be called */
1962 if (rw && check == 1)
1964 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1965 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1971 /* Fix actual inode counts */
1973 Log("totalInodes %d\n",totalInodes);
1974 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1975 static int TraceBadLinkCounts = 0;
1976 #ifdef AFS_NAMEI_ENV
1977 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1978 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1979 VGLinkH_p1 = ip->u.param[0];
1980 continue; /* Deal with this last. */
1983 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1984 TraceBadLinkCounts--; /* Limit reports, per volume */
1985 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1987 while (ip->linkCount > 0) {
1988 /* below used to assert, not break */
1990 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1991 Log("idec failed. inode %s errno %d\n",
1992 PrintInode(NULL, ip->inodeNumber), errno);
1998 while (ip->linkCount < 0) {
1999 /* these used to be asserts */
2001 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2002 Log("iinc failed. inode %s errno %d\n",
2003 PrintInode(NULL, ip->inodeNumber), errno);
2010 #ifdef AFS_NAMEI_ENV
2011 while (dec_VGLinkH > 0) {
2012 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2013 Log("idec failed on link table, errno = %d\n", errno);
2017 while (dec_VGLinkH < 0) {
2018 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2019 Log("iinc failed on link table, errno = %d\n", errno);
2026 /* Directory consistency checks on the rw volume */
2028 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2029 IH_RELEASE(salvinfo->VGLinkH);
2031 if (canfork && !debug) {
2038 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2040 /* Check headers BEFORE forking */
2044 for (i = 0; i < nVols; i++) {
2045 struct VolumeSummary *vs = isp[i].volSummary;
2046 VolumeDiskData volHeader;
2048 /* Don't salvage just because phantom rw volume is there... */
2049 /* (If a read-only volume exists, read/write inodes must also exist) */
2050 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2054 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2055 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2056 == sizeof(volHeader)
2057 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2058 && volHeader.dontSalvage == DONT_SALVAGE
2059 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2060 if (volHeader.inUse != 0) {
2061 volHeader.inUse = 0;
2062 volHeader.inService = 1;
2064 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2065 != sizeof(volHeader)) {
2081 /* SalvageVolumeHeaderFile
2083 * Salvage the top level V*.vol header file. Make sure the special files
2084 * exist and that there are no duplicates.
2086 * Calls SalvageHeader for each possible type of volume special file.
2090 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2091 struct ViceInodeInfo *inodes, int RW,
2092 int check, int *deleteMe)
2095 struct ViceInodeInfo *ip;
2096 int allinodesobsolete = 1;
2097 struct VolumeDiskHeader diskHeader;
2098 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2101 /* keeps track of special inodes that are probably 'good'; they are
2102 * referenced in the vol header, and are included in the given inodes
2107 } goodspecial[MAXINODETYPE];
2112 memset(goodspecial, 0, sizeof(goodspecial));
2114 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2116 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2118 Log("cannot allocate memory for inode skip array when salvaging "
2119 "volume %lu; not performing duplicate special inode recovery\n",
2120 afs_printable_uint32_lu(isp->volumeId));
2121 /* still try to perform the salvage; the skip array only does anything
2122 * if we detect duplicate special inodes */
2126 * First, look at the special inodes and see if any are referenced by
2127 * the existing volume header. If we find duplicate special inodes, we
2128 * can use this information to use the referenced inode (it's more
2129 * likely to be the 'good' one), and throw away the duplicates.
2131 if (isp->volSummary && skip) {
2132 /* use tempHeader, so we can use the stuff[] array to easily index
2133 * into the isp->volSummary special inodes */
2134 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2136 for (i = 0; i < isp->nSpecialInodes; i++) {
2137 ip = &inodes[isp->index + i];
2138 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2139 /* will get taken care of in a later loop */
2142 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2143 goodspecial[ip->u.special.type-1].valid = 1;
2144 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2149 memset(&tempHeader, 0, sizeof(tempHeader));
2150 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2151 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2152 tempHeader.id = isp->volumeId;
2153 tempHeader.parent = isp->RWvolumeId;
2155 /* Check for duplicates (inodes are sorted by type field) */
2156 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2157 ip = &inodes[isp->index + i];
2158 if (ip->u.special.type == (ip + 1)->u.special.type) {
2159 afs_ino_str_t stmp1, stmp2;
2161 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2162 /* Will be caught in the loop below */
2166 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2167 ip->u.special.type, isp->volumeId,
2168 PrintInode(stmp1, ip->inodeNumber),
2169 PrintInode(stmp2, (ip+1)->inodeNumber));
2171 if (skip && goodspecial[ip->u.special.type-1].valid) {
2172 Inode gi = goodspecial[ip->u.special.type-1].inode;
2175 Log("using special inode referenced by vol header (%s)\n",
2176 PrintInode(stmp1, gi));
2179 /* the volume header references some special inode of
2180 * this type in the inodes array; are we it? */
2181 if (ip->inodeNumber != gi) {
2183 } else if ((ip+1)->inodeNumber != gi) {
2184 /* in case this is the last iteration; we need to
2185 * make sure we check ip+1, too */
2190 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2198 for (i = 0; i < isp->nSpecialInodes; i++) {
2199 ip = &inodes[isp->index + i];
2200 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2202 Log("Rubbish header inode %s of type %d\n",
2203 PrintInode(NULL, ip->inodeNumber),
2204 ip->u.special.type);
2210 Log("Rubbish header inode %s of type %d; deleted\n",
2211 PrintInode(NULL, ip->inodeNumber),
2212 ip->u.special.type);
2213 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2214 if (skip && skip[i]) {
2215 if (orphans == ORPH_REMOVE) {
2216 Log("Removing orphan special inode %s of type %d\n",
2217 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2220 Log("Ignoring orphan special inode %s of type %d\n",
2221 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2222 /* fall through to the ip->linkCount--; line below */
2225 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2226 allinodesobsolete = 0;
2228 if (!check && ip->u.special.type != VI_LINKTABLE)
2229 ip->linkCount--; /* Keep the inode around */
2237 if (allinodesobsolete) {
2244 salvinfo->VGLinkH_cnt++; /* one for every header. */
2246 if (!RW && !check && isp->volSummary) {
2247 ClearROInUseBit(isp->volSummary);
2251 for (i = 0; i < MAXINODETYPE; i++) {
2252 if (stuff[i].inodeType == VI_LINKTABLE) {
2253 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2254 * And we may have recreated the link table earlier, so set the
2255 * RW header as well.
2257 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2258 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2262 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2266 if (isp->volSummary == NULL) {
2268 char headerName[64];
2269 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2270 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2272 Log("No header file for volume %u\n", isp->volumeId);
2276 Log("No header file for volume %u; %screating %s\n",
2277 isp->volumeId, (Testing ? "it would have been " : ""),
2279 isp->volSummary = (struct VolumeSummary *)
2280 malloc(sizeof(struct VolumeSummary));
2281 isp->volSummary->fileName = ToString(headerName);
2283 writefunc = VCreateVolumeDiskHeader;
2286 char headerName[64];
2287 /* hack: these two fields are obsolete... */
2288 isp->volSummary->header.volumeAcl = 0;
2289 isp->volSummary->header.volumeMountTable = 0;
2292 (&isp->volSummary->header, &tempHeader,
2293 sizeof(struct VolumeHeader))) {
2294 /* We often remove the name before calling us, so we make a fake one up */
2295 if (isp->volSummary->fileName) {
2296 strcpy(headerName, isp->volSummary->fileName);
2298 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2299 isp->volSummary->fileName = ToString(headerName);
2301 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2303 Log("Header file %s is damaged or no longer valid%s\n", path,
2304 (check ? "" : "; repairing"));
2308 writefunc = VWriteVolumeDiskHeader;
2312 memcpy(&isp->volSummary->header, &tempHeader,
2313 sizeof(struct VolumeHeader));
2316 Log("It would have written a new header file for volume %u\n",
2320 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2321 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2323 Log("Error %ld writing volume header file for volume %lu\n",
2324 afs_printable_int32_ld(code),
2325 afs_printable_uint32_lu(diskHeader.id));
2330 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2331 isp->volSummary->header.volumeInfo);
2336 SalvageHeader(struct SalvInfo *salvinfo, struct stuff *sp,
2337 struct InodeSummary *isp, int check, int *deleteMe)
2340 VolumeDiskData volumeInfo;
2341 struct versionStamp fileHeader;
2350 #ifndef AFS_NAMEI_ENV
2351 if (sp->inodeType == VI_LINKTABLE)
2354 if (*(sp->inode) == 0) {
2356 Log("Missing inode in volume header (%s)\n", sp->description);
2360 Log("Missing inode in volume header (%s); %s\n", sp->description,
2361 (Testing ? "it would have recreated it" : "recreating"));
2364 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2365 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2366 if (!VALID_INO(*(sp->inode)))
2368 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2369 sp->description, errno);
2374 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2375 fdP = IH_OPEN(specH);
2376 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2377 /* bail out early and destroy the volume */
2379 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2386 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2387 sp->description, errno);
2390 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2391 || header.fileHeader.magic != sp->stamp.magic)) {
2393 Log("Part of the header (%s) is corrupted\n", sp->description);
2394 FDH_REALLYCLOSE(fdP);
2398 Log("Part of the header (%s) is corrupted; recreating\n",
2401 /* header can be garbage; make sure we don't read garbage data from
2403 memset(&header, 0, sizeof(header));
2405 if (sp->inodeType == VI_VOLINFO
2406 && header.volumeInfo.destroyMe == DESTROY_ME) {
2409 FDH_REALLYCLOSE(fdP);
2413 if (recreate && !Testing) {
2416 ("Internal error: recreating volume header (%s) in check mode\n",
2418 nBytes = FDH_TRUNC(fdP, 0);
2420 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2421 sp->description, errno);
2423 /* The following code should be moved into vutil.c */
2424 if (sp->inodeType == VI_VOLINFO) {
2426 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2427 header.volumeInfo.stamp = sp->stamp;
2428 header.volumeInfo.id = isp->volumeId;
2429 header.volumeInfo.parentId = isp->RWvolumeId;
2430 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2431 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2432 isp->volumeId, isp->volumeId);
2433 header.volumeInfo.inService = 0;
2434 header.volumeInfo.blessed = 0;
2435 /* The + 1000 is a hack in case there are any files out in venus caches */
2436 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2437 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2438 header.volumeInfo.needsCallback = 0;
2439 gettimeofday(&tp, 0);
2440 header.volumeInfo.creationDate = tp.tv_sec;
2441 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2443 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2444 sp->description, errno);
2447 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2448 sizeof(header.volumeInfo));
2449 if (nBytes != sizeof(header.volumeInfo)) {
2452 ("Unable to write volume header file (%s) (errno = %d)\n",
2453 sp->description, errno);
2454 Abort("Unable to write entire volume header file (%s)\n",
2458 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2460 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2461 sp->description, errno);
2463 nBytes = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2464 if (nBytes != sizeof(sp->stamp)) {
2467 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2468 sp->description, errno);
2470 ("Unable to write entire version stamp in volume header file (%s)\n",
2475 FDH_REALLYCLOSE(fdP);
2477 if (sp->inodeType == VI_VOLINFO) {
2478 salvinfo->VolInfo = header.volumeInfo;
2482 if (salvinfo->VolInfo.updateDate) {
2483 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2485 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2486 salvinfo->VolInfo.id,
2487 (Testing ? "it would have been " : ""), update);
2489 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2491 Log("%s (%u) not updated (created %s)\n",
2492 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2502 SalvageVnodes(struct SalvInfo *salvinfo,
2503 struct InodeSummary *rwIsp,
2504 struct InodeSummary *thisIsp,
2505 struct ViceInodeInfo *inodes, int check)
2507 int ilarge, ismall, ioffset, RW, nInodes;
2508 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2511 RW = (rwIsp == thisIsp);
2512 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2514 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2515 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2516 if (check && ismall == -1)
2519 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2520 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2521 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2525 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2526 struct ViceInodeInfo *ip, int nInodes,
2527 struct VolumeSummary *volSummary, int check)
2529 VolumeId volumeNumber;
2530 char buf[SIZEOF_LARGEDISKVNODE];
2531 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2533 StreamHandle_t *file;
2534 struct VnodeClassInfo *vcp;
2536 afs_sfsize_t nVnodes;
2537 afs_fsize_t vnodeLength;
2539 afs_ino_str_t stmp1, stmp2;
2543 volumeNumber = volSummary->header.id;
2544 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2545 fdP = IH_OPEN(handle);
2546 assert(fdP != NULL);
2547 file = FDH_FDOPEN(fdP, "r+");
2548 assert(file != NULL);
2549 vcp = &VnodeClassInfo[class];
2550 size = OS_SIZE(fdP->fd_fd);
2552 nVnodes = (size / vcp->diskSize) - 1;
2554 assert((nVnodes + 1) * vcp->diskSize == size);
2555 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2559 for (vnodeIndex = 0;
2560 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2561 nVnodes--, vnodeIndex++) {
2562 if (vnode->type != vNull) {
2563 int vnodeChanged = 0;
2564 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2565 /* Log programs that belong to root (potentially suid root);
2566 * don't bother for read-only or backup volumes */
2567 #ifdef notdef /* This is done elsewhere */
2568 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2569 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", salvinfo->VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2571 if (VNDISK_GET_INO(vnode) == 0) {
2573 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2574 memset(vnode, 0, vcp->diskSize);
2578 if (vcp->magic != vnode->vnodeMagic) {
2579 /* bad magic #, probably partially created vnode */
2580 Log("Partially allocated vnode %d deleted.\n",
2582 memset(vnode, 0, vcp->diskSize);
2586 /* ****** Should do a bit more salvage here: e.g. make sure
2587 * vnode type matches what it should be given the index */
2588 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2589 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2590 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2591 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2598 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2599 /* The following doesn't work, because the version number
2600 * is not maintained correctly by the file server */
2601 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2602 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2604 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2610 /* For RW volume, look for vnode with matching inode number;
2611 * if no such match, take the first determined by our sort
2613 struct ViceInodeInfo *lip = ip;
2614 int lnInodes = nInodes;
2616 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2617 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2626 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2627 /* "Matching" inode */
2631 vu = vnode->uniquifier;
2632 iu = ip->u.vnode.vnodeUniquifier;
2633 vd = vnode->dataVersion;
2634 id = ip->u.vnode.inodeDataVersion;
2636 * Because of the possibility of the uniquifier overflows (> 4M)
2637 * we compare them modulo the low 22-bits; we shouldn't worry
2638 * about mismatching since they shouldn't to many old
2639 * uniquifiers of the same vnode...
2641 if (IUnique(vu) != IUnique(iu)) {
2643 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2646 vnode->uniquifier = iu;
2647 #ifdef AFS_3DISPARES
2648 vnode->dataVersion = (id >= vd ?
2651 1887437 ? vd : id) :
2654 1887437 ? id : vd));
2656 #if defined(AFS_SGI_EXMAG)
2657 vnode->dataVersion = (id >= vd ?
2660 15099494 ? vd : id) :
2663 15099494 ? id : vd));
2665 vnode->dataVersion = (id > vd ? id : vd);
2666 #endif /* AFS_SGI_EXMAG */
2667 #endif /* AFS_3DISPARES */
2670 /* don't bother checking for vd > id any more, since
2671 * partial file transfers always result in this state,
2672 * and you can't do much else anyway (you've already
2673 * found the best data you can) */
2674 #ifdef AFS_3DISPARES
2675 if (!vnodeIsDirectory(vnodeNumber)
2676 && ((vd < id && (id - vd) < 1887437)
2677 || ((vd > id && (vd - id) > 1887437)))) {
2679 #if defined(AFS_SGI_EXMAG)
2680 if (!vnodeIsDirectory(vnodeNumber)
2681 && ((vd < id && (id - vd) < 15099494)
2682 || ((vd > id && (vd - id) > 15099494)))) {
2684 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2685 #endif /* AFS_SGI_EXMAG */
2688 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2689 vnode->dataVersion = id;
2694 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2697 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2699 VNDISK_SET_INO(vnode, ip->inodeNumber);
2704 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2706 VNDISK_SET_INO(vnode, ip->inodeNumber);
2709 VNDISK_GET_LEN(vnodeLength, vnode);
2710 if (ip->byteCount != vnodeLength) {
2713 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2718 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2719 VNDISK_SET_LEN(vnode, ip->byteCount);
2723 ip->linkCount--; /* Keep the inode around */
2726 } else { /* no matching inode */
2727 if (VNDISK_GET_INO(vnode) != 0
2728 || vnode->type == vDirectory) {
2729 /* No matching inode--get rid of the vnode */
2731 if (VNDISK_GET_INO(vnode)) {
2733 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2737 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2742 if (VNDISK_GET_INO(vnode)) {
2744 time_t serverModifyTime = vnode->serverModifyTime;
2745 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2749 time_t serverModifyTime = vnode->serverModifyTime;
2750 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2753 memset(vnode, 0, vcp->diskSize);
2756 /* Should not reach here becuase we checked for
2757 * (inodeNumber == 0) above. And where we zero the vnode,
2758 * we also goto vnodeDone.
2762 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2766 } /* VNDISK_GET_INO(vnode) != 0 */
2768 assert(!(vnodeChanged && check));
2769 if (vnodeChanged && !Testing) {
2771 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2772 (char *)vnode, vcp->diskSize)
2774 salvinfo->VolumeChanged = 1; /* For break call back */
2785 struct VnodeEssence *
2786 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2789 struct VnodeInfo *vip;
2792 class = vnodeIdToClass(vnodeNumber);
2793 vip = &salvinfo->vnodeInfo[class];
2794 offset = vnodeIdToBitNumber(vnodeNumber);
2795 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2799 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2801 /* Copy the directory unconditionally if we are going to change it:
2802 * not just if was cloned.
2804 struct VnodeDiskObject vnode;
2805 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2806 Inode oldinode, newinode;
2809 if (dir->copied || Testing)
2811 DFlush(); /* Well justified paranoia... */
2814 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2815 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2817 assert(code == sizeof(vnode));
2818 oldinode = VNDISK_GET_INO(&vnode);
2819 /* Increment the version number by a whole lot to avoid problems with
2820 * clients that were promised new version numbers--but the file server
2821 * crashed before the versions were written to disk.
2824 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2825 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2827 assert(VALID_INO(newinode));
2828 assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2830 VNDISK_SET_INO(&vnode, newinode);
2832 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2833 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2835 assert(code == sizeof(vnode));
2837 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2838 salvinfo->fileSysDevice, newinode,
2839 &salvinfo->VolumeChanged);
2840 /* Don't delete the original inode right away, because the directory is
2841 * still being scanned.
2847 * This function should either successfully create a new dir, or give up
2848 * and leave things the way they were. In particular, if it fails to write
2849 * the new dir properly, it should return w/o changing the reference to the
2853 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2855 struct VnodeDiskObject vnode;
2856 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2857 Inode oldinode, newinode;
2862 afs_int32 parentUnique = 1;
2863 struct VnodeEssence *vnodeEssence;
2868 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2870 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2871 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2873 assert(lcode == sizeof(vnode));
2874 oldinode = VNDISK_GET_INO(&vnode);
2875 /* Increment the version number by a whole lot to avoid problems with
2876 * clients that were promised new version numbers--but the file server
2877 * crashed before the versions were written to disk.
2880 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2881 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2883 assert(VALID_INO(newinode));
2884 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2885 &salvinfo->VolumeChanged);
2887 /* Assign . and .. vnode numbers from dir and vnode.parent.
2888 * The uniquifier for . is in the vnode.
2889 * The uniquifier for .. might be set to a bogus value of 1 and
2890 * the salvager will later clean it up.
2892 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2893 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2896 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2898 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2903 /* didn't really build the new directory properly, let's just give up. */
2904 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2905 Log("Directory salvage returned code %d, continuing.\n", code);
2907 Log("also failed to decrement link count on new inode");
2911 Log("Checking the results of the directory salvage...\n");
2912 if (!DirOK(&newdir)) {
2913 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2914 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2919 VNDISK_SET_INO(&vnode, newinode);
2920 length = Length(&newdir);
2921 VNDISK_SET_LEN(&vnode, length);
2923 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2924 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2926 assert(lcode == sizeof(vnode));
2929 nt_sync(salvinfo->fileSysDevice);
2931 sync(); /* this is slow, but hopefully rarely called. We don't have
2932 * an open FD on the file itself to fsync.
2936 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2938 /* make sure old directory file is really closed */
2939 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2940 FDH_REALLYCLOSE(fdP);
2942 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2944 dir->dirHandle = newdir;
2948 * arguments for JudgeEntry.
2950 struct judgeEntry_params {
2951 struct DirSummary *dir; /**< directory we're examining entries in */
2952 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2956 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2959 struct judgeEntry_params *params = arock;
2960 struct DirSummary *dir = params->dir;
2961 struct SalvInfo *salvinfo = params->salvinfo;
2962 struct VnodeEssence *vnodeEssence;
2963 afs_int32 dirOrphaned, todelete;
2965 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2967 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2968 if (vnodeEssence == NULL) {
2970 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2973 CopyOnWrite(salvinfo, dir);
2974 assert(Delete(&dir->dirHandle, name) == 0);
2979 #ifndef AFS_NAMEI_ENV
2980 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2981 * mount inode for the partition. If this inode were deleted, it would crash
2984 if (vnodeEssence->InodeNumber == 0) {
2985 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2987 CopyOnWrite(salvinfo, dir);
2988 assert(Delete(&dir->dirHandle, name) == 0);
2995 if (!(vnodeNumber & 1) && !Showmode
2996 && !(vnodeEssence->count || vnodeEssence->unique
2997 || vnodeEssence->modeBits)) {
2998 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2999 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3000 vnodeNumber, unique,
3001 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3005 CopyOnWrite(salvinfo, dir);
3006 assert(Delete(&dir->dirHandle, name) == 0);
3012 /* Check if the Uniquifiers match. If not, change the directory entry
3013 * so its unique matches the vnode unique. Delete if the unique is zero
3014 * or if the directory is orphaned.
3016 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3017 if (!vnodeEssence->unique
3018 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3019 /* This is an orphaned directory. Don't delete the . or ..
3020 * entry. Otherwise, it will get created in the next
3021 * salvage and deleted again here. So Just skip it.
3026 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3029 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3033 fid.Vnode = vnodeNumber;
3034 fid.Unique = vnodeEssence->unique;
3035 CopyOnWrite(salvinfo, dir);
3036 assert(Delete(&dir->dirHandle, name) == 0);
3038 assert(Create(&dir->dirHandle, name, &fid) == 0);
3041 return 0; /* no need to continue */
3044 if (strcmp(name, ".") == 0) {
3045 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3048 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3050 CopyOnWrite(salvinfo, dir);
3051 assert(Delete(&dir->dirHandle, ".") == 0);
3052 fid.Vnode = dir->vnodeNumber;
3053 fid.Unique = dir->unique;
3054 assert(Create(&dir->dirHandle, ".", &fid) == 0);
3057 vnodeNumber = fid.Vnode; /* Get the new Essence */
3058 unique = fid.Unique;
3059 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3062 } else if (strcmp(name, "..") == 0) {
3065 struct VnodeEssence *dotdot;
3066 pa.Vnode = dir->parent;
3067 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3068 assert(dotdot != NULL); /* XXX Should not be assert */
3069 pa.Unique = dotdot->unique;
3071 pa.Vnode = dir->vnodeNumber;
3072 pa.Unique = dir->unique;
3074 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3076 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3078 CopyOnWrite(salvinfo, dir);
3079 assert(Delete(&dir->dirHandle, "..") == 0);
3080 assert(Create(&dir->dirHandle, "..", &pa) == 0);
3083 vnodeNumber = pa.Vnode; /* Get the new Essence */
3085 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3087 dir->haveDotDot = 1;
3088 } else if (strncmp(name, ".__afs", 6) == 0) {
3090 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3093 CopyOnWrite(salvinfo, dir);
3094 assert(Delete(&dir->dirHandle, name) == 0);
3096 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3097 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3100 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3101 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3102 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3103 && !(vnodeEssence->modeBits & 0111)) {
3110 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3111 vnodeEssence->InodeNumber);
3114 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3118 size = FDH_SIZE(fdP);
3120 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3121 FDH_REALLYCLOSE(fdP);
3128 nBytes = FDH_READ(fdP, buf, size);
3129 if (nBytes == size) {
3131 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3132 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3133 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3134 Testing ? "would convert" : "converted");
3135 vnodeEssence->modeBits |= 0111;
3136 vnodeEssence->changed = 1;
3137 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3138 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3139 dir->name ? dir->name : "??", name, buf);
3141 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3142 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3144 FDH_REALLYCLOSE(fdP);
3147 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3148 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3149 if (vnodeIdToClass(vnodeNumber) == vLarge
3150 && vnodeEssence->name == NULL) {
3152 if ((n = (char *)malloc(strlen(name) + 1)))
3154 vnodeEssence->name = n;
3157 /* The directory entry points to the vnode. Check to see if the
3158 * vnode points back to the directory. If not, then let the
3159 * directory claim it (else it might end up orphaned). Vnodes
3160 * already claimed by another directory are deleted from this
3161 * directory: hardlinks to the same vnode are not allowed
3162 * from different directories.
3164 if (vnodeEssence->parent != dir->vnodeNumber) {
3165 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3166 /* Vnode does not point back to this directory.
3167 * Orphaned dirs cannot claim a file (it may belong to
3168 * another non-orphaned dir).
3171 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3173 vnodeEssence->parent = dir->vnodeNumber;
3174 vnodeEssence->changed = 1;
3176 /* Vnode was claimed by another directory */
3179 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3180 } else if (vnodeNumber == 1) {
3181 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3183 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3187 CopyOnWrite(salvinfo, dir);
3188 assert(Delete(&dir->dirHandle, name) == 0);
3193 /* This directory claims the vnode */
3194 vnodeEssence->claimed = 1;
3196 vnodeEssence->count--;
3201 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3202 VnodeClass class, Inode ino, Unique * maxu)
3204 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3205 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3206 char buf[SIZEOF_LARGEDISKVNODE];
3207 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3209 StreamHandle_t *file;
3214 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3215 fdP = IH_OPEN(vip->handle);
3216 assert(fdP != NULL);
3217 file = FDH_FDOPEN(fdP, "r+");
3218 assert(file != NULL);
3219 size = OS_SIZE(fdP->fd_fd);
3221 vip->nVnodes = (size / vcp->diskSize) - 1;
3222 if (vip->nVnodes > 0) {
3223 assert((vip->nVnodes + 1) * vcp->diskSize == size);
3224 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
3225 assert((vip->vnodes = (struct VnodeEssence *)
3226 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3227 if (class == vLarge) {
3228 assert((vip->inodes = (Inode *)
3229 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3238 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3239 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3240 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3241 nVnodes--, vnodeIndex++) {
3242 if (vnode->type != vNull) {
3243 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3244 afs_fsize_t vnodeLength;
3245 vip->nAllocatedVnodes++;
3246 vep->count = vnode->linkCount;
3247 VNDISK_GET_LEN(vnodeLength, vnode);
3248 vep->blockCount = nBlocks(vnodeLength);
3249 vip->volumeBlockCount += vep->blockCount;
3250 vep->parent = vnode->parent;
3251 vep->unique = vnode->uniquifier;
3252 if (*maxu < vnode->uniquifier)
3253 *maxu = vnode->uniquifier;
3254 vep->modeBits = vnode->modeBits;
3255 vep->InodeNumber = VNDISK_GET_INO(vnode);
3256 vep->type = vnode->type;
3257 vep->author = vnode->author;
3258 vep->owner = vnode->owner;
3259 vep->group = vnode->group;
3260 if (vnode->type == vDirectory) {
3261 if (class != vLarge) {
3262 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3263 vip->nAllocatedVnodes--;
3264 memset(vnode, 0, sizeof(vnode));
3265 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3266 vnodeIndexOffset(vcp, vnodeNumber),
3267 (char *)&vnode, sizeof(vnode));
3268 salvinfo->VolumeChanged = 1;
3270 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3279 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3282 struct VnodeEssence *parentvp;
3288 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3289 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3291 strcat(path, vp->name);
3297 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3298 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3301 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3303 struct VnodeEssence *vep;
3306 return (1); /* Vnode zero does not exist */
3308 return (0); /* The root dir vnode is always claimed */
3309 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3310 if (!vep || !vep->claimed)
3311 return (1); /* Vnode is not claimed - it is orphaned */
3313 return (IsVnodeOrphaned(salvinfo, vep->parent));
3317 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3318 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3319 struct DirSummary *rootdir, int *rootdirfound)
3321 static struct DirSummary dir;
3322 static struct DirHandle dirHandle;
3323 struct VnodeEssence *parent;
3324 static char path[MAXPATHLEN];
3327 if (dirVnodeInfo->vnodes[i].salvaged)
3328 return; /* already salvaged */
3331 dirVnodeInfo->vnodes[i].salvaged = 1;
3333 if (dirVnodeInfo->inodes[i] == 0)
3334 return; /* Not allocated to a directory */
3336 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3337 if (dirVnodeInfo->vnodes[i].parent) {
3338 Log("Bad parent, vnode 1; %s...\n",
3339 (Testing ? "skipping" : "salvaging"));
3340 dirVnodeInfo->vnodes[i].parent = 0;
3341 dirVnodeInfo->vnodes[i].changed = 1;
3344 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3345 if (parent && parent->salvaged == 0)
3346 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3347 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3348 rootdir, rootdirfound);
3351 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3352 dir.unique = dirVnodeInfo->vnodes[i].unique;
3355 dir.parent = dirVnodeInfo->vnodes[i].parent;
3356 dir.haveDot = dir.haveDotDot = 0;
3357 dir.ds_linkH = alinkH;
3358 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3359 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3361 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3364 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3365 (Testing ? "skipping" : "salvaging"));
3368 CopyAndSalvage(salvinfo, &dir);
3370 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3373 dirHandle = dir.dirHandle;
3376 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3377 &dirVnodeInfo->vnodes[i], path);
3380 /* If enumeration failed for random reasons, we will probably delete
3381 * too much stuff, so we guard against this instead.
3383 struct judgeEntry_params judge_params;
3384 judge_params.salvinfo = salvinfo;
3385 judge_params.dir = &dir;
3387 assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3390 /* Delete the old directory if it was copied in order to salvage.
3391 * CopyOnWrite has written the new inode # to the disk, but we still
3392 * have the old one in our local structure here. Thus, we idec the
3396 if (dir.copied && !Testing) {
3397 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3399 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3402 /* Remember rootdir DirSummary _after_ it has been judged */
3403 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3404 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3412 * Get a new FID that can be used to create a new file.
3414 * @param[in] volHeader vol header for the volume
3415 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3416 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3417 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3418 * updated to the new max unique if we create a new
3422 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3423 VnodeClass class, AFSFid *afid, Unique *maxunique)
3426 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3427 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3431 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3432 /* no free vnodes; make a new one */
3433 salvinfo->vnodeInfo[class].nVnodes++;
3434 salvinfo->vnodeInfo[class].vnodes =
3435 realloc(salvinfo->vnodeInfo[class].vnodes,
3436 sizeof(struct VnodeEssence) * (i+1));
3438 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3441 afid->Vnode = bitNumberToVnodeNumber(i, class);
3443 if (volHeader->uniquifier < (*maxunique + 1)) {
3444 /* header uniq is bad; it will get bumped by 2000 later */
3445 afid->Unique = *maxunique + 1 + 2000;
3448 /* header uniq seems okay; just use that */
3449 afid->Unique = *maxunique = volHeader->uniquifier++;
3454 * Create a vnode for a README file explaining not to use a recreated-root vol.
3456 * @param[in] volHeader vol header for the volume
3457 * @param[in] alinkH ihandle for i/o for the volume
3458 * @param[in] vid volume id
3459 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3460 * updated to the new max unique if we create a new
3462 * @param[out] afid FID for the new readme vnode
3463 * @param[out] ainode the inode for the new readme file
3465 * @return operation status
3470 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3471 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3475 struct VnodeDiskObject *rvnode = NULL;
3477 IHandle_t *readmeH = NULL;
3478 struct VnodeEssence *vep;
3480 time_t now = time(NULL);
3482 /* Try to make the note brief, but informative. Only administrators should
3483 * be able to read this file at first, so we can hopefully assume they
3484 * know what AFS is, what a volume is, etc. */
3486 "This volume has been salvaged, but has lost its original root directory.\n"
3487 "The root directory that exists now has been recreated from orphan files\n"
3488 "from the rest of the volume. This recreated root directory may interfere\n"
3489 "with old cached data on clients, and there is no way the salvager can\n"
3490 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3491 "use this volume, but only copy the salvaged data to a new volume.\n"
3492 "Continuing to use this volume as it exists now may cause some clients to\n"
3493 "behave oddly when accessing this volume.\n"
3494 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3495 /* ^ the person reading this probably just lost some data, so they could
3496 * use some cheering up. */
3498 /* -1 for the trailing NUL */
3499 length = sizeof(readme) - 1;
3501 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3503 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3505 /* create the inode and write the contents */
3506 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3507 salvinfo->fileSysPath, 0, vid,
3508 afid->Vnode, afid->Unique, 1);
3509 if (!VALID_INO(readmeinode)) {
3510 Log("CreateReadme: readme IH_CREATE failed\n");
3514 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3515 bytes = IH_IWRITE(readmeH, 0, readme, length);
3516 IH_RELEASE(readmeH);
3518 if (bytes != length) {
3519 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3520 (int)sizeof(readme));
3524 /* create the vnode and write it out */
3525 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3527 Log("CreateRootDir: error alloc'ing memory\n");
3531 rvnode->type = vFile;
3533 rvnode->modeBits = 0777;
3534 rvnode->linkCount = 1;
3535 VNDISK_SET_LEN(rvnode, length);
3536 rvnode->uniquifier = afid->Unique;
3537 rvnode->dataVersion = 1;
3538 VNDISK_SET_INO(rvnode, readmeinode);
3539 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3544 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3546 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3547 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3548 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3550 if (bytes != SIZEOF_SMALLDISKVNODE) {
3551 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3552 (int)SIZEOF_SMALLDISKVNODE);
3556 /* update VnodeEssence for new readme vnode */
3557 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3559 vep->blockCount = nBlocks(length);
3560 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3561 vep->parent = rvnode->parent;
3562 vep->unique = rvnode->uniquifier;
3563 vep->modeBits = rvnode->modeBits;
3564 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3565 vep->type = rvnode->type;
3566 vep->author = rvnode->author;
3567 vep->owner = rvnode->owner;
3568 vep->group = rvnode->group;
3578 *ainode = readmeinode;
3583 if (IH_DEC(alinkH, readmeinode, vid)) {
3584 Log("CreateReadme (recovery): IH_DEC failed\n");
3596 * create a root dir for a volume that lacks one.
3598 * @param[in] volHeader vol header for the volume
3599 * @param[in] alinkH ihandle for disk access for this volume group
3600 * @param[in] vid volume id we're dealing with
3601 * @param[out] rootdir populated with info about the new root dir
3602 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3603 * updated to the new max unique if we create a new
3606 * @return operation status
3611 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3612 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3616 int decroot = 0, decreadme = 0;
3617 AFSFid did, readmeid;
3620 struct VnodeDiskObject *rootvnode = NULL;
3621 struct acl_accessList *ACL;
3624 struct VnodeEssence *vep;
3626 time_t now = time(NULL);
3628 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3629 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3633 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3634 /* We don't have any large vnodes in the volume; allocate room
3635 * for one so we can recreate the root dir */
3636 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3637 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3638 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3640 assert(salvinfo->vnodeInfo[vLarge].vnodes);
3641 assert(salvinfo->vnodeInfo[vLarge].inodes);
3644 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3645 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3646 if (vep->type != vNull) {
3647 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3651 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3652 &readmeinode) != 0) {
3657 /* set the DV to a very high number, so it is unlikely that we collide
3658 * with a cached DV */
3661 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3663 if (!VALID_INO(rootinode)) {
3664 Log("CreateRootDir: IH_CREATE failed\n");
3669 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3670 rootinode, &salvinfo->VolumeChanged);
3674 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3675 Log("CreateRootDir: MakeDir failed\n");
3678 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3679 Log("CreateRootDir: Create failed\n");
3683 length = Length(&rootdir->dirHandle);
3684 DZap((void *)&rootdir->dirHandle);
3686 /* create the new root dir vnode */
3687 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3689 Log("CreateRootDir: malloc failed\n");
3693 /* only give 'rl' permissions to 'system:administrators'. We do this to
3694 * try to catch the attention of an administrator, that they should not
3695 * be writing to this directory or continue to use it. */
3696 ACL = VVnodeDiskACL(rootvnode);
3697 ACL->size = sizeof(struct acl_accessList);
3698 ACL->version = ACL_ACLVERSION;
3702 ACL->entries[0].id = -204; /* system:administrators */
3703 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3705 rootvnode->type = vDirectory;
3706 rootvnode->cloned = 0;
3707 rootvnode->modeBits = 0777;
3708 rootvnode->linkCount = 2;
3709 VNDISK_SET_LEN(rootvnode, length);
3710 rootvnode->uniquifier = 1;
3711 rootvnode->dataVersion = dv;
3712 VNDISK_SET_INO(rootvnode, rootinode);
3713 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3714 rootvnode->author = 0;
3715 rootvnode->owner = 0;
3716 rootvnode->parent = 0;
3717 rootvnode->group = 0;
3718 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3720 /* write it out to disk */
3721 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3722 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3723 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3725 if (bytes != SIZEOF_LARGEDISKVNODE) {
3726 /* just cast to int and don't worry about printing real 64-bit ints;
3727 * a large disk vnode isn't anywhere near the 32-bit limit */
3728 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3729 (int)SIZEOF_LARGEDISKVNODE);
3733 /* update VnodeEssence for the new root vnode */
3734 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3736 vep->blockCount = nBlocks(length);
3737 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3738 vep->parent = rootvnode->parent;
3739 vep->unique = rootvnode->uniquifier;
3740 vep->modeBits = rootvnode->modeBits;
3741 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3742 vep->type = rootvnode->type;
3743 vep->author = rootvnode->author;
3744 vep->owner = rootvnode->owner;
3745 vep->group = rootvnode->group;
3755 /* update DirSummary for the new root vnode */
3756 rootdir->vnodeNumber = 1;
3757 rootdir->unique = 1;
3758 rootdir->haveDot = 1;
3759 rootdir->haveDotDot = 1;
3760 rootdir->rwVid = vid;
3761 rootdir->copied = 0;
3762 rootdir->parent = 0;
3763 rootdir->name = strdup(".");
3764 rootdir->vname = volHeader->name;
3765 rootdir->ds_linkH = alinkH;
3772 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3773 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3775 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3776 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3786 * salvage a volume group.
3788 * @param[in] salvinfo information for the curent salvage job
3789 * @param[in] rwIsp inode summary for rw volume
3790 * @param[in] alinkH link table inode handle
3792 * @return operation status
3796 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3798 /* This routine, for now, will only be called for read-write volumes */
3800 int BlocksInVolume = 0, FilesInVolume = 0;
3802 struct DirSummary rootdir, oldrootdir;
3803 struct VnodeInfo *dirVnodeInfo;
3804 struct VnodeDiskObject vnode;
3805 VolumeDiskData volHeader;
3807 int orphaned, rootdirfound = 0;
3808 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3809 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3810 struct VnodeEssence *vep;
3813 afs_sfsize_t nBytes;
3815 VnodeId LFVnode, ThisVnode;
3816 Unique LFUnique, ThisUnique;
3820 vid = rwIsp->volSummary->header.id;
3821 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3822 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3823 assert(nBytes == sizeof(volHeader));
3824 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3825 assert(volHeader.destroyMe != DESTROY_ME);
3826 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3828 DistilVnodeEssence(salvinfo, vid, vLarge,
3829 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3830 DistilVnodeEssence(salvinfo, vid, vSmall,
3831 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3833 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3834 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3835 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3836 &rootdir, &rootdirfound);
3839 nt_sync(salvinfo->fileSysDevice);
3841 sync(); /* This used to be done lower level, for every dir */
3848 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3850 Log("Cannot find root directory for volume %lu; attempting to create "
3851 "a new one\n", afs_printable_uint32_lu(vid));
3853 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3858 salvinfo->VolumeChanged = 1;
3862 /* Parse each vnode looking for orphaned vnodes and
3863 * connect them to the tree as orphaned (if requested).
3865 oldrootdir = rootdir;
3866 for (class = 0; class < nVNODECLASSES; class++) {
3867 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3868 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3869 ThisVnode = bitNumberToVnodeNumber(v, class);
3870 ThisUnique = vep->unique;
3872 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3873 continue; /* Ignore unused, claimed, and root vnodes */
3875 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3876 * entry in this vnode had incremented the parent link count (In
3877 * JudgeEntry()). We need to go to the parent and decrement that
3878 * link count. But if the parent's unique is zero, then the parent
3879 * link count was not incremented in JudgeEntry().
3881 if (class == vLarge) { /* directory vnode */
3882 pv = vnodeIdToBitNumber(vep->parent);
3883 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3884 if (vep->parent == 1 && newrootdir) {
3885 /* this vnode's parent was the volume root, and
3886 * we just created the volume root. So, the parent
3887 * dir didn't exist during JudgeEntry, so the link
3888 * count was not inc'd there, so don't dec it here.
3894 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3900 continue; /* If no rootdir, can't attach orphaned files */
3902 /* Here we attach orphaned files and directories into the
3903 * root directory, LVVnode, making sure link counts stay correct.
3905 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3906 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3907 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3909 /* Update this orphaned vnode's info. Its parent info and
3910 * link count (do for orphaned directories and files).
3912 vep->parent = LFVnode; /* Parent is the root dir */
3913 vep->unique = LFUnique;
3916 vep->count--; /* Inc link count (root dir will pt to it) */
3918 /* If this orphaned vnode is a directory, change '..'.
3919 * The name of the orphaned dir/file is unknown, so we
3920 * build a unique name. No need to CopyOnWrite the directory
3921 * since it is not connected to tree in BK or RO volume and
3922 * won't be visible there.
3924 if (class == vLarge) {
3928 /* Remove and recreate the ".." entry in this orphaned directory */
3929 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3930 salvinfo->vnodeInfo[class].inodes[v],
3931 &salvinfo->VolumeChanged);
3933 pa.Unique = LFUnique;
3934 assert(Delete(&dh, "..") == 0);
3935 assert(Create(&dh, "..", &pa) == 0);
3937 /* The original parent's link count was decremented above.
3938 * Here we increment the new parent's link count.
3940 pv = vnodeIdToBitNumber(LFVnode);
3941 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3945 /* Go to the root dir and add this entry. The link count of the
3946 * root dir was incremented when ".." was created. Try 10 times.
3948 for (j = 0; j < 10; j++) {
3949 pa.Vnode = ThisVnode;
3950 pa.Unique = ThisUnique;
3952 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3954 vLarge) ? "__ORPHANDIR__" :
3955 "__ORPHANFILE__"), ThisVnode,
3958 CopyOnWrite(salvinfo, &rootdir);
3959 code = Create(&rootdir.dirHandle, npath, &pa);
3963 ThisUnique += 50; /* Try creating a different file */
3966 Log("Attaching orphaned %s to volume's root dir as %s\n",
3967 ((class == vLarge) ? "directory" : "file"), npath);
3969 } /* for each vnode in the class */
3970 } /* for each class of vnode */
3972 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3974 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3976 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3979 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3982 DFlush(); /* Flush the changes */
3983 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3984 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3985 orphans = ORPH_IGNORE;
3988 /* Write out all changed vnodes. Orphaned files and directories
3989 * will get removed here also (if requested).
3991 for (class = 0; class < nVNODECLASSES; class++) {
3992 int nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3993 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3994 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3995 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
3996 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
3997 for (i = 0; i < nVnodes; i++) {
3998 struct VnodeEssence *vnp = &vnodes[i];
3999 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4001 /* If the vnode is good but is unclaimed (not listed in
4002 * any directory entries), then it is orphaned.
4005 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4006 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4010 if (vnp->changed || vnp->count) {
4013 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4014 vnodeIndexOffset(vcp, vnodeNumber),
4015 (char *)&vnode, sizeof(vnode));
4016 assert(nBytes == sizeof(vnode));
4018 vnode.parent = vnp->parent;
4019 oldCount = vnode.linkCount;
4020 vnode.linkCount = vnode.linkCount - vnp->count;
4023 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4025 if (!vnp->todelete) {
4026 /* Orphans should have already been attached (if requested) */
4027 assert(orphans != ORPH_ATTACH);
4028 oblocks += vnp->blockCount;
4031 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4033 BlocksInVolume -= vnp->blockCount;
4035 if (VNDISK_GET_INO(&vnode)) {
4037 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4040 memset(&vnode, 0, sizeof(vnode));
4042 } else if (vnp->count) {
4044 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4047 vnode.modeBits = vnp->modeBits;
4050 vnode.dataVersion++;
4053 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4054 vnodeIndexOffset(vcp, vnodeNumber),
4055 (char *)&vnode, sizeof(vnode));
4056 assert(nBytes == sizeof(vnode));
4058 salvinfo->VolumeChanged = 1;
4062 if (!Showmode && ofiles) {
4063 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4065 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4069 for (class = 0; class < nVNODECLASSES; class++) {
4070 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4071 for (i = 0; i < vip->nVnodes; i++)
4072 if (vip->vnodes[i].name)
4073 free(vip->vnodes[i].name);
4080 /* Set correct resource utilization statistics */
4081 volHeader.filecount = FilesInVolume;
4082 volHeader.diskused = BlocksInVolume;
4084 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4085 if (volHeader.uniquifier < (maxunique + 1)) {
4087 Log("Volume uniquifier is too low; fixed\n");
4088 /* Plus 2,000 in case there are workstations out there with
4089 * cached vnodes that have since been deleted
4091 volHeader.uniquifier = (maxunique + 1 + 2000);
4095 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4096 "Only use this salvaged volume to copy data to another volume; "
4097 "do not continue to use this volume (%lu) as-is.\n",
4098 afs_printable_uint32_lu(vid));
4101 #ifdef FSSYNC_BUILD_CLIENT
4102 if (!Testing && salvinfo->VolumeChanged) {
4103 afs_int32 fsync_code;
4105 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4107 Log("Error trying to tell the fileserver to break callbacks for "
4108 "changed volume %lu; error code %ld\n",
4109 afs_printable_uint32_lu(vid),
4110 afs_printable_int32_ld(fsync_code));
4112 salvinfo->VolumeChanged = 0;
4115 #endif /* FSSYNC_BUILD_CLIENT */
4117 /* Turn off the inUse bit; the volume's been salvaged! */
4118 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4119 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4120 volHeader.inService = 1; /* allow service again */
4121 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4122 volHeader.dontSalvage = DONT_SALVAGE;
4123 salvinfo->VolumeChanged = 0;
4125 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4126 assert(nBytes == sizeof(volHeader));
4129 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4130 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4131 FilesInVolume, BlocksInVolume);
4134 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4135 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4141 ClearROInUseBit(struct VolumeSummary *summary)
4143 IHandle_t *h = summary->volumeInfoHandle;
4144 afs_sfsize_t nBytes;
4146 VolumeDiskData volHeader;
4148 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4149 assert(nBytes == sizeof(volHeader));
4150 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4151 volHeader.inUse = 0;
4152 volHeader.needsSalvaged = 0;
4153 volHeader.inService = 1;
4154 volHeader.dontSalvage = DONT_SALVAGE;
4156 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4157 assert(nBytes == sizeof(volHeader));
4162 * Possible delete the volume.
4164 * deleteMe - Always do so, only a partial volume.
4167 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4168 char *message, int deleteMe, int check)
4170 if (readOnly(isp) || deleteMe) {
4171 if (isp->volSummary && isp->volSummary->fileName) {
4174 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4176 Log("It will be deleted on this server (you may find it elsewhere)\n");
4179 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4181 Log("it will be deleted instead. It should be recloned.\n");
4186 sprintf(path, "%s/%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4188 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4190 Log("Error %ld destroying volume disk header for volume %lu\n",
4191 afs_printable_int32_ld(code),
4192 afs_printable_uint32_lu(isp->volumeId));
4195 /* make sure we actually delete the fileName file; ENOENT
4196 * is fine, since VDestroyVolumeDiskHeader probably already
4198 if (unlink(path) && errno != ENOENT) {
4199 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4203 } else if (!check) {
4204 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4206 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4210 #ifdef AFS_DEMAND_ATTACH_FS
4212 * Locks a volume on disk for salvaging.
4214 * @param[in] volumeId volume ID to lock
4216 * @return operation status
4218 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4219 * checked out and locked again
4224 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4229 /* should always be WRITE_LOCK, but keep the lock-type logic all
4230 * in one place, in VVolLockType. Params will be ignored, but
4231 * try to provide what we're logically doing. */
4232 locktype = VVolLockType(V_VOLUPD, 1);
4234 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4236 if (code == EBUSY) {
4237 Abort("Someone else appears to be using volume %lu; Aborted\n",
4238 afs_printable_uint32_lu(volumeId));
4240 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4241 afs_printable_int32_ld(code),
4242 afs_printable_uint32_lu(volumeId));
4245 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4246 if (code == SYNC_DENIED) {
4247 /* need to retry checking out volumes */
4250 if (code != SYNC_OK) {
4251 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4252 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4255 /* set inUse = programType in the volume header to ensure that nobody
4256 * tries to use this volume again without salvaging, if we somehow crash
4257 * or otherwise exit before finishing the salvage.
4261 struct VolumeHeader header;
4262 struct VolumeDiskHeader diskHeader;
4263 struct VolumeDiskData volHeader;
4265 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4270 DiskToVolumeHeader(&header, &diskHeader);
4272 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4273 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4274 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4280 volHeader.inUse = programType;
4282 /* If we can't re-write the header, bail out and error. We don't
4283 * assert when reading the header, since it's possible the
4284 * header isn't really there (when there's no data associated
4285 * with the volume; we just delete the vol header file in that
4286 * case). But if it's there enough that we can read it, but
4287 * somehow we cannot write to it to signify we're salvaging it,
4288 * we've got a big problem and we cannot continue. */
4289 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4296 #endif /* AFS_DEMAND_ATTACH_FS */
4299 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4304 memset(&res, 0, sizeof(res));
4306 for (i = 0; i < 3; i++) {
4307 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4308 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4310 if (code == SYNC_OK) {
4312 } else if (code == SYNC_DENIED) {
4313 #ifdef DEMAND_ATTACH_ENABLE
4314 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4316 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4318 Abort("Salvage aborted\n");
4319 } else if (code == SYNC_BAD_COMMAND) {
4320 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4322 #ifdef DEMAND_ATTACH_ENABLE
4323 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4325 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4327 Abort("Salvage aborted\n");
4330 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4331 FSYNC_clientFinis();
4335 if (code != SYNC_OK) {
4336 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4337 Abort("Salvage aborted\n");
4342 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4346 for (i = 0; i < 3; i++) {
4347 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4348 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4350 if (code == SYNC_OK) {
4352 } else if (code == SYNC_DENIED) {
4353 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4354 } else if (code == SYNC_BAD_COMMAND) {
4355 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4357 #ifdef DEMAND_ATTACH_ENABLE
4358 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4360 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4365 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4366 FSYNC_clientFinis();
4373 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4375 /* Volume parameter is passed in case iopen is upgraded in future to
4376 * require a volume Id to be passed
4379 IHandle_t *srcH, *destH;
4380 FdHandle_t *srcFdP, *destFdP;
4383 IH_INIT(srcH, device, rwvolume, inode1);
4384 srcFdP = IH_OPEN(srcH);
4385 assert(srcFdP != NULL);
4386 IH_INIT(destH, device, rwvolume, inode2);
4387 destFdP = IH_OPEN(destH);
4388 while ((nBytes = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
4389 assert(FDH_WRITE(destFdP, buf, nBytes) == nBytes);
4390 assert(nBytes == 0);
4391 FDH_REALLYCLOSE(srcFdP);
4392 FDH_REALLYCLOSE(destFdP);
4399 PrintInodeList(struct SalvInfo *salvinfo)
4401 struct ViceInodeInfo *ip;
4402 struct ViceInodeInfo *buf;
4403 struct afs_stat status;
4406 assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4407 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4408 assert(buf != NULL);
4409 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4410 assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4411 for (ip = buf; nInodes--; ip++) {
4412 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4413 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
4414 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4415 ip->u.param[2], ip->u.param[3]);
4421 PrintInodeSummary(struct SalvInfo *salvinfo)
4424 struct InodeSummary *isp;
4426 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4427 isp = &salvinfo->inodeSummary[i];
4428 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4433 PrintVolumeSummary(struct SalvInfo *salvinfo)
4436 struct VolumeSummary *vsp;
4438 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4439 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4449 assert(0); /* Fork is never executed in the NT code path */
4453 #ifdef AFS_DEMAND_ATTACH_FS
4454 if ((f == 0) && (programType == salvageServer)) {
4455 /* we are a salvageserver child */
4456 #ifdef FSSYNC_BUILD_CLIENT
4457 VChildProcReconnectFS_r();
4459 #ifdef SALVSYNC_BUILD_CLIENT
4463 #endif /* AFS_DEMAND_ATTACH_FS */
4464 #endif /* !AFS_NT40_ENV */
4474 #ifdef AFS_DEMAND_ATTACH_FS
4475 if (programType == salvageServer) {
4476 #ifdef SALVSYNC_BUILD_CLIENT
4479 #ifdef FSSYNC_BUILD_CLIENT
4483 #endif /* AFS_DEMAND_ATTACH_FS */
4486 if (main_thread != pthread_self())
4487 pthread_exit((void *)code);
4500 pid = wait(&status);
4502 if (WCOREDUMP(status))
4503 Log("\"%s\" core dumped!\n", prog);
4504 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4510 TimeStamp(time_t clock, int precision)
4513 static char timestamp[20];
4514 lt = localtime(&clock);
4516 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4518 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4523 CheckLogFile(char * log_path)
4525 char oldSlvgLog[AFSDIR_PATH_MAX];
4527 #ifndef AFS_NT40_ENV
4534 strcpy(oldSlvgLog, log_path);
4535 strcat(oldSlvgLog, ".old");
4537 renamefile(log_path, oldSlvgLog);
4538 logFile = afs_fopen(log_path, "a");
4540 if (!logFile) { /* still nothing, use stdout */
4544 #ifndef AFS_NAMEI_ENV
4545 AFS_DEBUG_IOPS_LOG(logFile);
4550 #ifndef AFS_NT40_ENV
4552 TimeStampLogFile(char * log_path)
4554 char stampSlvgLog[AFSDIR_PATH_MAX];
4559 lt = localtime(&now);
4560 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4561 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4562 log_path, lt->tm_year + 1900,
4563 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4566 /* try to link the logfile to a timestamped filename */
4567 /* if it fails, oh well, nothing we can do */
4568 link(log_path, stampSlvgLog);
4577 #ifndef AFS_NT40_ENV
4579 printf("Can't show log since using syslog.\n");
4590 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4593 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4596 while (fgets(line, sizeof(line), logFile))
4603 Log(const char *format, ...)
4609 va_start(args, format);
4610 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4612 #ifndef AFS_NT40_ENV
4614 syslog(LOG_INFO, "%s", tmp);
4618 gettimeofday(&now, 0);
4619 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4625 Abort(const char *format, ...)
4630 va_start(args, format);
4631 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4633 #ifndef AFS_NT40_ENV
4635 syslog(LOG_INFO, "%s", tmp);
4639 fprintf(logFile, "%s", tmp);
4651 ToString(const char *s)
4654 p = (char *)malloc(strlen(s) + 1);
4660 /* Remove the FORCESALVAGE file */
4662 RemoveTheForce(char *path)
4665 struct afs_stat force; /* so we can use afs_stat to find it */
4666 strcpy(target,path);
4667 strcat(target,"/FORCESALVAGE");
4668 if (!Testing && ForceSalvage) {
4669 if (afs_stat(target,&force) == 0) unlink(target);
4673 #ifndef AFS_AIX32_ENV
4675 * UseTheForceLuke - see if we can use the force
4678 UseTheForceLuke(char *path)
4680 struct afs_stat force;
4682 strcpy(target,path);
4683 strcat(target,"/FORCESALVAGE");
4685 return (afs_stat(target, &force) == 0);
4689 * UseTheForceLuke - see if we can use the force
4692 * The VRMIX fsck will not muck with the filesystem it is supposedly
4693 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4694 * muck directly with the root inode, which is within the normal
4696 * ListViceInodes() has a side effect of setting ForceSalvage if
4697 * it detects a need, based on root inode examination.
4700 UseTheForceLuke(char *path)
4703 return 0; /* sorry OB1 */
4708 /* NT support routines */
4710 static char execpathname[MAX_PATH];
4712 nt_SalvagePartition(char *partName, int jobn)
4717 if (!*execpathname) {
4718 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4719 if (!n || n == 1023)
4722 job.cj_magic = SALVAGER_MAGIC;
4723 job.cj_number = jobn;
4724 (void)strcpy(job.cj_part, partName);
4725 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4730 nt_SetupPartitionSalvage(void *datap, int len)
4732 childJob_t *jobp = (childJob_t *) datap;
4733 char logname[AFSDIR_PATH_MAX];
4735 if (len != sizeof(childJob_t))
4737 if (jobp->cj_magic != SALVAGER_MAGIC)
4742 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4744 logFile = afs_fopen(logname, "w");
4752 #endif /* AFS_NT40_ENV */