2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/afs_assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
193 #include "vol_internal.h"
195 #include <afs/prs_fs.h>
197 #ifdef FSSYNC_BUILD_CLIENT
198 #include "vg_cache.h"
205 /*@+fcnmacros +macrofcndecl@*/
208 extern off64_t afs_lseek(int FD, off64_t O, int F);
209 #endif /*S_SPLINT_S */
210 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
211 #define afs_stat stat64
212 #define afs_fstat fstat64
213 #define afs_open open64
214 #define afs_fopen fopen64
215 #else /* !O_LARGEFILE */
217 extern off_t afs_lseek(int FD, off_t O, int F);
218 #endif /*S_SPLINT_S */
219 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
220 #define afs_stat stat
221 #define afs_fstat fstat
222 #define afs_open open
223 #define afs_fopen fopen
224 #endif /* !O_LARGEFILE */
225 /*@=fcnmacros =macrofcndecl@*/
228 extern void *calloc();
230 static char *TimeStamp(time_t clock, int precision);
233 int debug; /* -d flag */
234 extern int Testing; /* -n flag */
235 int ListInodeOption; /* -i flag */
236 int ShowRootFiles; /* -r flag */
237 int RebuildDirs; /* -sal flag */
238 int Parallel = 4; /* -para X flag */
239 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
240 int forceR = 0; /* -b flag */
241 int ShowLog = 0; /* -showlog flag */
242 int ShowSuid = 0; /* -showsuid flag */
243 int ShowMounts = 0; /* -showmounts flag */
244 int orphans = ORPH_IGNORE; /* -orphans option */
249 int useSyslog = 0; /* -syslog flag */
250 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
259 #define MAXPARALLEL 32
261 int OKToZap; /* -o flag */
262 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
263 * in the volume header */
265 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
267 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
270 * information that is 'global' to a particular salvage job.
273 Device fileSysDevice; /**< The device number of the current partition
275 char fileSysPath[8]; /**< The path of the mounted partition currently
276 * being salvaged, i.e. the directory containing
277 * the volume headers */
278 char *fileSysPathName; /**< NT needs this to make name pretty log. */
279 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
280 int VGLinkH_cnt; /**< # of references to lnk handle. */
281 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
284 char *fileSysDeviceName; /**< The block device where the file system being
285 * salvaged was mounted */
286 char *filesysfulldev;
288 int VolumeChanged; /**< Set by any routine which would change the
289 * volume in a way which would require callbacks
290 * to be broken if the volume was put back on
291 * on line by an active file server */
293 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
294 * header dealt with */
296 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
297 int inodeFd; /**< File descriptor for inode file */
299 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
300 int nVolumes; /**< Number of volumes (read-write and read-only)
301 * in volume summary */
302 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
305 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
306 * vnodes in the volume that
307 * we are currently looking
315 /* Forward declarations */
316 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
317 static int AskVolumeSummary(struct SalvInfo *salvinfo,
318 VolumeId singleVolumeNumber);
320 #ifdef AFS_DEMAND_ATTACH_FS
321 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
322 #endif /* AFS_DEMAND_ATTACH_FS */
324 /* Uniquifier stored in the Inode */
329 return (u & 0x3fffff);
331 #if defined(AFS_SGI_EXMAG)
332 return (u & SGI_UNIQMASK);
335 #endif /* AFS_SGI_EXMAG */
342 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
344 return 0; /* otherwise may be transient, e.g. EMFILE */
349 char *save_args[MAX_ARGS];
351 extern pthread_t main_thread;
352 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
356 * Get the salvage lock if not already held. Hold until process exits.
358 * @param[in] locktype READ_LOCK or WRITE_LOCK
361 _ObtainSalvageLock(int locktype)
363 struct VLockFile salvageLock;
368 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
370 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
373 "salvager: There appears to be another salvager running! "
378 "salvager: Error %d trying to acquire salvage lock! "
384 ObtainSalvageLock(void)
386 _ObtainSalvageLock(WRITE_LOCK);
389 ObtainSharedSalvageLock(void)
391 _ObtainSalvageLock(READ_LOCK);
395 #ifdef AFS_SGI_XFS_IOPS_ENV
396 /* Check if the given partition is mounted. For XFS, the root inode is not a
397 * constant. So we check the hard way.
400 IsPartitionMounted(char *part)
403 struct mntent *mntent;
405 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
406 while (mntent = getmntent(mntfp)) {
407 if (!strcmp(part, mntent->mnt_dir))
412 return mntent ? 1 : 1;
415 /* Check if the given inode is the root of the filesystem. */
416 #ifndef AFS_SGI_XFS_IOPS_ENV
418 IsRootInode(struct afs_stat *status)
421 * The root inode is not a fixed value in XFS partitions. So we need to
422 * see if the partition is in the list of mounted partitions. This only
423 * affects the SalvageFileSys path, so we check there.
425 return (status->st_ino == ROOTINODE);
430 #ifndef AFS_NAMEI_ENV
431 /* We don't want to salvage big files filesystems, since we can't put volumes on
435 CheckIfBigFilesFS(char *mountPoint, char *devName)
437 struct superblock fs;
440 if (strncmp(devName, "/dev/", 5)) {
441 (void)sprintf(name, "/dev/%s", devName);
443 (void)strcpy(name, devName);
446 if (ReadSuper(&fs, name) < 0) {
447 Log("Unable to read superblock. Not salvaging partition %s.\n",
451 if (IsBigFilesFileSystem(&fs)) {
452 Log("Partition %s is a big files filesystem, not salvaging.\n",
462 #define HDSTR "\\Device\\Harddisk"
463 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
465 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
470 static int dowarn = 1;
472 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
474 if (strncmp(res, HDSTR, HDLEN)) {
477 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
478 res, HDSTR, p1->devName);
482 d1 = atoi(&res[HDLEN]);
484 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
486 if (strncmp(res, HDSTR, HDLEN)) {
489 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
490 res, HDSTR, p2->devName);
494 d2 = atoi(&res[HDLEN]);
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
584 osi_Assert(pid != -1);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
589 osi_Assert(j < numjobs);
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, '/');
720 ptr = (char *)strrchr(pbuffer, '/');
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s\\", salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 AskOffline(salvinfo, singleVolumeNumber);
794 #ifdef AFS_DEMAND_ATTACH_FS
795 if (LockVolume(salvinfo, singleVolumeNumber)) {
798 #endif /* AFS_DEMAND_ATTACH_FS */
801 VLockPartition(partP->name);
805 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
808 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
809 partP->name, name, (Testing ? "(READONLY mode)" : ""));
811 Log("***Forced salvage of all volumes on this partition***\n");
816 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
823 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
824 while ((dp = readdir(dirp))) {
825 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
826 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
828 Log("Removing old salvager temp files %s\n", dp->d_name);
829 strcpy(npath, salvinfo->fileSysPath);
831 strcat(npath, dp->d_name);
837 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
839 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
840 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
842 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
846 inodeFile = fopen(inodeListPath, "w+b");
848 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
851 /* Using nt_unlink here since we're really using the delete on close
852 * semantics of unlink. In most places in the salvager, we really do
853 * mean to unlink the file at that point. Those places have been
854 * modified to actually do that so that the NT crt can be used there.
856 code = nt_unlink(inodeListPath);
858 code = unlink(inodeListPath);
861 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
864 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
868 salvinfo->inodeFd = fileno(inodeFile);
869 if (salvinfo->inodeFd == -1)
870 Abort("Temporary file %s is missing...\n", inodeListPath);
871 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
872 if (ListInodeOption) {
873 PrintInodeList(salvinfo);
876 /* enumerate volumes in the partition.
877 * figure out sets of read-only + rw volumes.
878 * salvage each set, read-only volumes first, then read-write.
879 * Fix up inodes on last volume in set (whether it is read-write
882 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
886 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
887 i < salvinfo->nVolumesInInodeFile; i = j) {
888 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
890 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
892 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
893 struct VolumeSummary *tsp;
894 /* Scan volume list (from partition root directory) looking for the
895 * current rw volume number in the volume list from the inode scan.
896 * If there is one here that is not in the inode volume list,
898 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
900 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
902 /* Now match up the volume summary info from the root directory with the
903 * entry in the volume list obtained from scanning inodes */
904 salvinfo->inodeSummary[j].volSummary = NULL;
905 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
906 if (tsp->header.id == vid) {
907 salvinfo->inodeSummary[j].volSummary = tsp;
913 /* Salvage the group of volumes (several read-only + 1 read/write)
914 * starting with the current read-only volume we're looking at.
916 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
919 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
920 for (; vsp < esp; vsp++) {
922 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
925 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
926 RemoveTheForce(salvinfo->fileSysPath);
928 if (!Testing && singleVolumeNumber) {
929 #ifdef AFS_DEMAND_ATTACH_FS
930 /* unlock vol headers so the fs can attach them when we AskOnline */
931 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
932 #endif /* AFS_DEMAND_ATTACH_FS */
934 AskOnline(salvinfo, singleVolumeNumber);
936 /* Step through the volumeSummary list and set all volumes on-line.
937 * The volumes were taken off-line in GetVolumeSummary.
939 for (j = 0; j < salvinfo->nVolumes; j++) {
940 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
944 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
945 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
948 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
952 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
955 sprintf(path, "%s/%s", salvinfo->fileSysPath, vsp->fileName);
958 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
961 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
963 Log("Error %ld destroying volume disk header for volume %lu\n",
964 afs_printable_int32_ld(code),
965 afs_printable_uint32_lu(vsp->header.id));
968 /* make sure we actually delete the fileName file; ENOENT
969 * is fine, since VDestroyVolumeDiskHeader probably already
971 if (unlink(path) && errno != ENOENT) {
972 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 CompareInodes(const void *_p1, const void *_p2)
981 const struct ViceInodeInfo *p1 = _p1;
982 const struct ViceInodeInfo *p2 = _p2;
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 VolumeId p1rwid, p2rwid;
987 (p1->u.vnode.vnodeNumber ==
988 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
990 (p2->u.vnode.vnodeNumber ==
991 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
996 if (p1->u.vnode.vnodeNumber == INODESPECIAL
997 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
998 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
999 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1000 if (p1->u.vnode.volumeId == p1rwid)
1002 if (p2->u.vnode.volumeId == p2rwid)
1004 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1006 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1007 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1008 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1010 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1012 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1014 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1016 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1018 /* The following tests are reversed, so that the most desirable
1019 * of several similar inodes comes first */
1020 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1034 #ifdef AFS_3DISPARES
1035 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1036 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1041 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1059 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1060 #ifdef AFS_3DISPARES
1061 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1062 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1065 #ifdef AFS_SGI_EXMAG
1066 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1067 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1076 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1077 struct InodeSummary *summary)
1079 VolumeId volume = ip->u.vnode.volumeId;
1080 VolumeId rwvolume = volume;
1085 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1087 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1089 rwvolume = ip->u.special.parentId;
1090 /* This isn't quite right, as there could (in error) be different
1091 * parent inodes in different special vnodes */
1093 if (maxunique < ip->u.vnode.vnodeUniquifier)
1094 maxunique = ip->u.vnode.vnodeUniquifier;
1098 summary->volumeId = volume;
1099 summary->RWvolumeId = rwvolume;
1100 summary->nInodes = n;
1101 summary->nSpecialInodes = nSpecial;
1102 summary->maxUniquifier = maxunique;
1106 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1108 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1109 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1110 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1115 * Collect list of inodes in file named by path. If a truly fatal error,
1116 * unlink the file and abort. For lessor errors, return -1. The file will
1117 * be unlinked by the caller.
1120 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1122 struct afs_stat status;
1125 struct ViceInodeInfo *ip, *ip_save;
1126 struct InodeSummary summary;
1127 char summaryFileName[50];
1130 char *dev = salvinfo->fileSysPath;
1131 char *wpath = salvinfo->fileSysPath;
1133 char *dev = salvinfo->fileSysDeviceName;
1134 char *wpath = salvinfo->filesysfulldev;
1136 char *part = salvinfo->fileSysPath;
1140 /* This file used to come from vfsck; cobble it up ourselves now... */
1142 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1143 singleVolumeNumber ? OnlyOneVolume : 0,
1144 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1146 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1149 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1151 if (forceSal && !ForceSalvage) {
1152 Log("***Forced salvage of all volumes on this partition***\n");
1155 fseek(inodeFile, 0L, SEEK_SET);
1156 salvinfo->inodeFd = fileno(inodeFile);
1157 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1158 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1160 tdir = (tmpdir ? tmpdir : part);
1162 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1163 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1165 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1166 "%s/salvage.temp.%d", tdir, getpid());
1168 summaryFile = afs_fopen(summaryFileName, "a+");
1169 if (summaryFile == NULL) {
1170 Abort("Unable to create inode summary file\n");
1174 /* Using nt_unlink here since we're really using the delete on close
1175 * semantics of unlink. In most places in the salvager, we really do
1176 * mean to unlink the file at that point. Those places have been
1177 * modified to actually do that so that the NT crt can be used there.
1179 code = nt_unlink(summaryFileName);
1181 code = unlink(summaryFileName);
1184 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1187 if (!canfork || debug || Fork() == 0) {
1189 unsigned long st_size=(unsigned long) status.st_size;
1190 nInodes = st_size / sizeof(struct ViceInodeInfo);
1192 fclose(summaryFile);
1193 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1194 RemoveTheForce(salvinfo->fileSysPath);
1196 struct VolumeSummary *vsp;
1199 GetVolumeSummary(salvinfo, singleVolumeNumber);
1201 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1203 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1206 Log("%s vice inodes on %s; not salvaged\n",
1207 singleVolumeNumber ? "No applicable" : "No", dev);
1210 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1212 fclose(summaryFile);
1214 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1217 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1218 fclose(summaryFile);
1219 Abort("Unable to read inode table; %s not salvaged\n", dev);
1221 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1222 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1223 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1224 fclose(summaryFile);
1225 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1230 CountVolumeInodes(ip, nInodes, &summary);
1231 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1232 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1233 fclose(summaryFile);
1236 summary.index += (summary.nInodes);
1237 nInodes -= summary.nInodes;
1238 ip += summary.nInodes;
1241 ip = ip_save = NULL;
1242 /* Following fflush is not fclose, because if it was debug mode would not work */
1243 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1244 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1245 fclose(summaryFile);
1248 if (canfork && !debug) {
1253 if (Wait("Inode summary") == -1) {
1254 fclose(summaryFile);
1255 Exit(1); /* salvage of this partition aborted */
1258 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1259 if (status.st_size != 0) {
1261 unsigned long st_status=(unsigned long)status.st_size;
1262 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1263 osi_Assert(salvinfo->inodeSummary != NULL);
1264 /* For GNU we need to do lseek to get the file pointer moved. */
1265 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1266 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1267 osi_Assert(ret == st_status);
1269 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1270 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1271 salvinfo->inodeSummary[i].volSummary = NULL;
1273 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1274 fclose(summaryFile);
1278 /* Comparison routine for volume sort.
1279 This is setup so that a read-write volume comes immediately before
1280 any read-only clones of that volume */
1282 CompareVolumes(const void *_p1, const void *_p2)
1284 const struct VolumeSummary *p1 = _p1;
1285 const struct VolumeSummary *p2 = _p2;
1286 if (p1->header.parent != p2->header.parent)
1287 return p1->header.parent < p2->header.parent ? -1 : 1;
1288 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1290 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1292 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1296 * Gleans volumeSummary information by asking the fileserver
1298 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1299 * salvaging a whole partition
1301 * @return whether we obtained the volume summary information or not
1302 * @retval 0 success; we obtained the volume summary information
1303 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1305 * @retval 1 we did not get the volume summary information; either the
1306 * fileserver responded with an error, or we are not supposed to
1307 * ask the fileserver for the information (e.g. we are salvaging
1308 * the entire partition or we are not the salvageserver)
1310 * @note for non-DAFS, always returns 1
1313 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1316 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1317 if (programType == salvageServer) {
1318 if (singleVolumeNumber) {
1319 FSSYNC_VGQry_response_t q_res;
1321 struct VolumeSummary *vsp;
1323 struct VolumeDiskHeader diskHdr;
1325 memset(&res, 0, sizeof(res));
1327 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1330 * We must wait for the partition to finish scanning before
1331 * can continue, since we will not know if we got the entire
1332 * VG membership unless the partition is fully scanned.
1333 * We could, in theory, just scan the partition ourselves if
1334 * the VG cache is not ready, but we would be doing the exact
1335 * same scan the fileserver is doing; it will almost always
1336 * be faster to wait for the fileserver. The only exceptions
1337 * are if the partition does not take very long to scan, and
1338 * in that case it's fast either way, so who cares?
1340 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1341 Log("waiting for fileserver to finish scanning partition %s...\n",
1342 salvinfo->fileSysPartition->name);
1344 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1345 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1346 * just so small partitions don't need to wait over 10
1347 * seconds every time, and large partitions are generally
1348 * polled only once every ten seconds. */
1349 sleep((i > 10) ? (i = 10) : i);
1351 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1355 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1356 /* This can happen if there's no header for the volume
1357 * we're salvaging, or no headers exist for the VG (if
1358 * we're salvaging an RW). Act as if we got a response
1359 * with no VG members. The headers may be created during
1360 * salvaging, if there are inodes in this VG. */
1362 memset(&q_res, 0, sizeof(q_res));
1363 q_res.rw = singleVolumeNumber;
1367 Log("fileserver refused VGCQuery request for volume %lu on "
1368 "partition %s, code %ld reason %ld\n",
1369 afs_printable_uint32_lu(singleVolumeNumber),
1370 salvinfo->fileSysPartition->name,
1371 afs_printable_int32_ld(code),
1372 afs_printable_int32_ld(res.hdr.reason));
1376 if (q_res.rw != singleVolumeNumber) {
1377 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1378 afs_printable_uint32_lu(singleVolumeNumber),
1379 afs_printable_uint32_lu(q_res.rw));
1380 #ifdef SALVSYNC_BUILD_CLIENT
1381 if (SALVSYNC_LinkVolume(q_res.rw,
1383 salvinfo->fileSysPartition->name,
1385 Log("schedule request failed\n");
1387 #endif /* SALVSYNC_BUILD_CLIENT */
1388 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1391 salvinfo->volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1392 osi_Assert(salvinfo->volumeSummaryp != NULL);
1394 salvinfo->nVolumes = 0;
1395 vsp = salvinfo->volumeSummaryp;
1397 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1398 char name[VMAXPATHLEN];
1400 if (!q_res.children[i]) {
1404 /* AskOffline for singleVolumeNumber was called much earlier */
1405 if (q_res.children[i] != singleVolumeNumber) {
1406 AskOffline(salvinfo, q_res.children[i]);
1407 if (LockVolume(salvinfo, q_res.children[i])) {
1413 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1415 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1416 afs_printable_uint32_lu(q_res.children[i]));
1421 DiskToVolumeHeader(&vsp->header, &diskHdr);
1422 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1423 vsp->fileName = ToString(name);
1424 salvinfo->nVolumes++;
1428 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1433 Log("Cannot get volume summary from fileserver; falling back to scanning "
1434 "entire partition\n");
1437 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1442 * count how many volume headers are found by VWalkVolumeHeaders.
1444 * @param[in] dp the disk partition (unused)
1445 * @param[in] name full path to the .vol header (unused)
1446 * @param[in] hdr the header data (unused)
1447 * @param[in] last whether this is the last try or not (unused)
1448 * @param[in] rock actually an afs_int32*; the running count of how many
1449 * volumes we have found
1454 CountHeader(struct DiskPartition64 *dp, const char *name,
1455 struct VolumeDiskHeader *hdr, int last, void *rock)
1457 afs_int32 *nvols = (afs_int32 *)rock;
1463 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1466 struct SalvageScanParams {
1467 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1468 * vol id of the VG we're salvaging */
1469 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1470 * we're filling in */
1471 afs_int32 nVolumes; /**< # of vols we've encountered */
1472 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1473 * # of vols we've alloc'd memory for) */
1474 int retry; /**< do we need to retry vol lock/checkout? */
1475 struct SalvInfo *salvinfo; /**< salvage job info */
1479 * records volume summary info found from VWalkVolumeHeaders.
1481 * Found volumes are also taken offline if they are in the specific volume
1482 * group we are looking for.
1484 * @param[in] dp the disk partition
1485 * @param[in] name full path to the .vol header
1486 * @param[in] hdr the header data
1487 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1488 * @param[in] rock actually a struct SalvageScanParams*, containing the
1489 * information needed to record the volume summary data
1491 * @return operation status
1493 * @retval -1 volume locking raced with fileserver restart; checking out
1494 * and locking volumes needs to be retried
1495 * @retval 1 volume header is mis-named and should be deleted
1498 RecordHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 char nameShouldBe[64];
1502 struct SalvageScanParams *params;
1503 struct VolumeSummary summary;
1504 VolumeId singleVolumeNumber;
1505 struct SalvInfo *salvinfo;
1507 params = (struct SalvageScanParams *)rock;
1509 singleVolumeNumber = params->singleVolumeNumber;
1510 salvinfo = params->salvinfo;
1512 DiskToVolumeHeader(&summary.header, hdr);
1514 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1515 && summary.header.parent != singleVolumeNumber) {
1517 if (programType == salvageServer) {
1518 #ifdef SALVSYNC_BUILD_CLIENT
1519 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1520 summary.header.id, summary.header.parent);
1521 if (SALVSYNC_LinkVolume(summary.header.parent,
1525 Log("schedule request failed\n");
1528 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1531 Log("%u is a read-only volume; not salvaged\n",
1532 singleVolumeNumber);
1537 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1538 || summary.header.parent == singleVolumeNumber) {
1540 /* check if the header file is incorrectly named */
1542 const char *base = strrchr(name, '/');
1549 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1550 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1553 if (strcmp(nameShouldBe, base)) {
1554 /* .vol file has wrong name; retry/delete */
1558 if (!badname || last) {
1559 /* only offline the volume if the header is good, or if this is
1560 * the last try looking at it; avoid AskOffline'ing the same vol
1563 if (singleVolumeNumber
1564 && summary.header.id != singleVolumeNumber) {
1565 /* don't offline singleVolumeNumber; we already did that
1568 AskOffline(salvinfo, summary.header.id);
1570 #ifdef AFS_DEMAND_ATTACH_FS
1572 /* don't lock the volume if the header is bad, since we're
1573 * about to delete it anyway. */
1574 if (LockVolume(salvinfo, summary.header.id)) {
1579 #endif /* AFS_DEMAND_ATTACH_FS */
1583 if (last && !Showmode) {
1584 Log("Volume header file %s is incorrectly named (should be %s "
1585 "not %s); %sdeleted (it will be recreated later, if "
1586 "necessary)\n", name, nameShouldBe, base,
1587 (Testing ? "it would have been " : ""));
1592 summary.fileName = ToString(base);
1595 if (params->nVolumes > params->totalVolumes) {
1596 /* We found more volumes than we found on the first partition walk;
1597 * apparently something created a volume while we were
1598 * partition-salvaging, or we found more than 20 vols when salvaging a
1599 * particular volume. Abort if we detect this, since other programs
1600 * supposed to not touch the partition while it is partition-salvaging,
1601 * and we shouldn't find more than 20 vols in a VG.
1603 Abort("Found %ld vol headers, but should have found at most %ld! "
1604 "Make sure the volserver/fileserver are not running at the "
1605 "same time as a partition salvage\n",
1606 afs_printable_int32_ld(params->nVolumes),
1607 afs_printable_int32_ld(params->totalVolumes));
1610 memcpy(params->vsp, &summary, sizeof(summary));
1618 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1620 * If the header could not be read in at all, the header is always unlinked.
1621 * If instead RecordHeader said the header was bad (that is, the header file
1622 * is mis-named), we only unlink if we are doing a partition salvage, as
1623 * opposed to salvaging a specific volume group.
1625 * @param[in] dp the disk partition
1626 * @param[in] name full path to the .vol header
1627 * @param[in] hdr header data, or NULL if the header could not be read
1628 * @param[in] rock actually a struct SalvageScanParams*, with some information
1632 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1633 struct VolumeDiskHeader *hdr, void *rock)
1635 struct SalvageScanParams *params;
1638 params = (struct SalvageScanParams *)rock;
1641 /* no header; header is too bogus to read in at all */
1643 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1649 } else if (!params->singleVolumeNumber) {
1650 /* We were able to read in a header, but RecordHeader said something
1651 * was wrong with it. We only unlink those if we are doing a partition
1658 if (dounlink && unlink(name)) {
1659 Log("Error %d while trying to unlink %s\n", errno, name);
1664 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1665 * the fileserver for VG information, or by scanning the /vicepX partition.
1667 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1668 * are salvaging, or 0 if this is a partition
1671 * @return operation status
1673 * @retval -1 we raced with a fileserver restart; checking out and locking
1674 * volumes must be retried
1677 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1679 afs_int32 nvols = 0;
1680 struct SalvageScanParams params;
1683 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1685 /* we successfully got the vol information from the fileserver; no
1686 * need to scan the partition */
1690 /* we need to retry volume checkout */
1694 if (!singleVolumeNumber) {
1695 /* Count how many volumes we have in /vicepX */
1696 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1699 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1704 nvols = VOL_VG_MAX_VOLS;
1707 salvinfo->volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1708 osi_Assert(salvinfo->volumeSummaryp != NULL);
1710 params.singleVolumeNumber = singleVolumeNumber;
1711 params.vsp = salvinfo->volumeSummaryp;
1712 params.nVolumes = 0;
1713 params.totalVolumes = nvols;
1715 params.salvinfo = salvinfo;
1717 /* walk the partition directory of volume headers and record the info
1718 * about them; unlinking invalid headers */
1719 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1720 UnlinkHeader, ¶ms);
1722 /* we apparently need to retry checking-out/locking volumes */
1726 Abort("Failed to get volume header summary\n");
1728 salvinfo->nVolumes = params.nVolumes;
1730 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1736 /* Find the link table. This should be associated with the RW volume or, if
1737 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1740 FindLinkHandle(struct InodeSummary *isp, int nVols,
1741 struct ViceInodeInfo *allInodes)
1744 struct ViceInodeInfo *ip;
1746 for (i = 0; i < nVols; i++) {
1747 ip = allInodes + isp[i].index;
1748 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1749 if (ip[j].u.special.type == VI_LINKTABLE)
1750 return ip[j].inodeNumber;
1757 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1759 struct versionStamp version;
1762 if (!VALID_INO(ino))
1764 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1765 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1766 if (!VALID_INO(ino))
1768 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1769 isp->RWvolumeId, errno);
1770 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1771 fdP = IH_OPEN(salvinfo->VGLinkH);
1773 Abort("Can't open link table for volume %u (error = %d)\n",
1774 isp->RWvolumeId, errno);
1776 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1777 Abort("Can't truncate link table for volume %u (error = %d)\n",
1778 isp->RWvolumeId, errno);
1780 version.magic = LINKTABLEMAGIC;
1781 version.version = LINKTABLEVERSION;
1783 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1785 Abort("Can't truncate link table for volume %u (error = %d)\n",
1786 isp->RWvolumeId, errno);
1788 FDH_REALLYCLOSE(fdP);
1790 /* If the volume summary exits (i.e., the V*.vol header file exists),
1791 * then set this inode there as well.
1793 if (isp->volSummary)
1794 isp->volSummary->header.linkTable = ino;
1803 SVGParms_t *parms = (SVGParms_t *) arg;
1804 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1809 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1812 pthread_attr_t tattr;
1816 /* Initialize per volume global variables, even if later code does so */
1817 salvinfo->VolumeChanged = 0;
1818 salvinfo->VGLinkH = NULL;
1819 salvinfo->VGLinkH_cnt = 0;
1820 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1822 parms.svgp_inodeSummaryp = isp;
1823 parms.svgp_count = nVols;
1824 parms.svgp_salvinfo = salvinfo;
1825 code = pthread_attr_init(&tattr);
1827 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1831 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1833 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1836 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1838 Log("Failed to create thread to salvage volume group %u\n",
1842 (void)pthread_join(tid, NULL);
1844 #endif /* AFS_NT40_ENV */
1847 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1849 struct ViceInodeInfo *inodes, *allInodes, *ip;
1850 int i, totalInodes, size, salvageTo;
1854 int dec_VGLinkH = 0;
1856 FdHandle_t *fdP = NULL;
1858 salvinfo->VGLinkH_cnt = 0;
1859 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1860 && isp->nSpecialInodes > 0);
1861 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1862 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1865 if (ShowMounts && !haveRWvolume)
1867 if (canfork && !debug && Fork() != 0) {
1868 (void)Wait("Salvage volume group");
1871 for (i = 0, totalInodes = 0; i < nVols; i++)
1872 totalInodes += isp[i].nInodes;
1873 size = totalInodes * sizeof(struct ViceInodeInfo);
1874 inodes = (struct ViceInodeInfo *)malloc(size);
1875 allInodes = inodes - isp->index; /* this would the base of all the inodes
1876 * for the partition, if all the inodes
1877 * had been read into memory */
1878 osi_Assert(afs_lseek
1879 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1881 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1883 /* Don't try to salvage a read write volume if there isn't one on this
1885 salvageTo = haveRWvolume ? 0 : 1;
1887 #ifdef AFS_NAMEI_ENV
1888 ino = FindLinkHandle(isp, nVols, allInodes);
1889 if (VALID_INO(ino)) {
1890 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1891 fdP = IH_OPEN(salvinfo->VGLinkH);
1893 if (!VALID_INO(ino) || fdP == NULL) {
1894 Log("%s link table for volume %u.\n",
1895 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1897 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1900 struct ViceInodeInfo *ip;
1901 CreateLinkTable(salvinfo, isp, ino);
1902 fdP = IH_OPEN(salvinfo->VGLinkH);
1903 /* Sync fake 1 link counts to the link table, now that it exists */
1905 for (i = 0; i < nVols; i++) {
1906 ip = allInodes + isp[i].index;
1907 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1909 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1911 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1919 FDH_REALLYCLOSE(fdP);
1921 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1924 /* Salvage in reverse order--read/write volume last; this way any
1925 * Inodes not referenced by the time we salvage the read/write volume
1926 * can be picked up by the read/write volume */
1927 /* ACTUALLY, that's not done right now--the inodes just vanish */
1928 for (i = nVols - 1; i >= salvageTo; i--) {
1930 struct InodeSummary *lisp = &isp[i];
1931 #ifdef AFS_NAMEI_ENV
1932 /* If only the RO is present on this partition, the link table
1933 * shows up as a RW volume special file. Need to make sure the
1934 * salvager doesn't try to salvage the non-existent RW.
1936 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1937 /* If this only special inode is the link table, continue */
1938 if (inodes->u.special.type == VI_LINKTABLE) {
1945 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1946 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1947 /* Check inodes twice. The second time do things seriously. This
1948 * way the whole RO volume can be deleted, below, if anything goes wrong */
1949 for (check = 1; check >= 0; check--) {
1951 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1953 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1954 if (rw && deleteMe) {
1955 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1956 * volume won't be called */
1962 if (rw && check == 1)
1964 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1965 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
1971 /* Fix actual inode counts */
1974 Log("totalInodes %d\n",totalInodes);
1975 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1976 static int TraceBadLinkCounts = 0;
1977 #ifdef AFS_NAMEI_ENV
1978 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
1979 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
1980 VGLinkH_p1 = ip->u.param[0];
1981 continue; /* Deal with this last. */
1984 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1985 TraceBadLinkCounts--; /* Limit reports, per volume */
1986 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1988 while (ip->linkCount > 0) {
1989 /* below used to assert, not break */
1991 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1992 Log("idec failed. inode %s errno %d\n",
1993 PrintInode(stmp, ip->inodeNumber), errno);
1999 while (ip->linkCount < 0) {
2000 /* these used to be asserts */
2002 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2003 Log("iinc failed. inode %s errno %d\n",
2004 PrintInode(stmp, ip->inodeNumber), errno);
2011 #ifdef AFS_NAMEI_ENV
2012 while (dec_VGLinkH > 0) {
2013 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2014 Log("idec failed on link table, errno = %d\n", errno);
2018 while (dec_VGLinkH < 0) {
2019 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2020 Log("iinc failed on link table, errno = %d\n", errno);
2027 /* Directory consistency checks on the rw volume */
2029 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2030 IH_RELEASE(salvinfo->VGLinkH);
2032 if (canfork && !debug) {
2039 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2041 /* Check headers BEFORE forking */
2045 for (i = 0; i < nVols; i++) {
2046 struct VolumeSummary *vs = isp[i].volSummary;
2047 VolumeDiskData volHeader;
2049 /* Don't salvage just because phantom rw volume is there... */
2050 /* (If a read-only volume exists, read/write inodes must also exist) */
2051 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2055 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2056 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2057 == sizeof(volHeader)
2058 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2059 && volHeader.dontSalvage == DONT_SALVAGE
2060 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2061 if (volHeader.inUse != 0) {
2062 volHeader.inUse = 0;
2063 volHeader.inService = 1;
2065 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2066 != sizeof(volHeader)) {
2082 /* SalvageVolumeHeaderFile
2084 * Salvage the top level V*.vol header file. Make sure the special files
2085 * exist and that there are no duplicates.
2087 * Calls SalvageHeader for each possible type of volume special file.
2091 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2092 struct ViceInodeInfo *inodes, int RW,
2093 int check, int *deleteMe)
2096 struct ViceInodeInfo *ip;
2097 int allinodesobsolete = 1;
2098 struct VolumeDiskHeader diskHeader;
2099 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2101 struct VolumeHeader tempHeader;
2102 struct afs_inode_info stuff[MAXINODETYPE];
2104 /* keeps track of special inodes that are probably 'good'; they are
2105 * referenced in the vol header, and are included in the given inodes
2110 } goodspecial[MAXINODETYPE];
2115 memset(goodspecial, 0, sizeof(goodspecial));
2117 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2119 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2121 Log("cannot allocate memory for inode skip array when salvaging "
2122 "volume %lu; not performing duplicate special inode recovery\n",
2123 afs_printable_uint32_lu(isp->volumeId));
2124 /* still try to perform the salvage; the skip array only does anything
2125 * if we detect duplicate special inodes */
2128 init_inode_info(&tempHeader, stuff);
2131 * First, look at the special inodes and see if any are referenced by
2132 * the existing volume header. If we find duplicate special inodes, we
2133 * can use this information to use the referenced inode (it's more
2134 * likely to be the 'good' one), and throw away the duplicates.
2136 if (isp->volSummary && skip) {
2137 /* use tempHeader, so we can use the stuff[] array to easily index
2138 * into the isp->volSummary special inodes */
2139 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2141 for (i = 0; i < isp->nSpecialInodes; i++) {
2142 ip = &inodes[isp->index + i];
2143 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2144 /* will get taken care of in a later loop */
2147 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2148 goodspecial[ip->u.special.type-1].valid = 1;
2149 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2154 memset(&tempHeader, 0, sizeof(tempHeader));
2155 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2156 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2157 tempHeader.id = isp->volumeId;
2158 tempHeader.parent = isp->RWvolumeId;
2160 /* Check for duplicates (inodes are sorted by type field) */
2161 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2162 ip = &inodes[isp->index + i];
2163 if (ip->u.special.type == (ip + 1)->u.special.type) {
2164 afs_ino_str_t stmp1, stmp2;
2166 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2167 /* Will be caught in the loop below */
2171 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2172 ip->u.special.type, isp->volumeId,
2173 PrintInode(stmp1, ip->inodeNumber),
2174 PrintInode(stmp2, (ip+1)->inodeNumber));
2176 if (skip && goodspecial[ip->u.special.type-1].valid) {
2177 Inode gi = goodspecial[ip->u.special.type-1].inode;
2180 Log("using special inode referenced by vol header (%s)\n",
2181 PrintInode(stmp1, gi));
2184 /* the volume header references some special inode of
2185 * this type in the inodes array; are we it? */
2186 if (ip->inodeNumber != gi) {
2188 } else if ((ip+1)->inodeNumber != gi) {
2189 /* in case this is the last iteration; we need to
2190 * make sure we check ip+1, too */
2195 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2203 for (i = 0; i < isp->nSpecialInodes; i++) {
2205 ip = &inodes[isp->index + i];
2206 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2208 Log("Rubbish header inode %s of type %d\n",
2209 PrintInode(stmp, ip->inodeNumber),
2210 ip->u.special.type);
2216 Log("Rubbish header inode %s of type %d; deleted\n",
2217 PrintInode(stmp, ip->inodeNumber),
2218 ip->u.special.type);
2219 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2220 if (skip && skip[i]) {
2221 if (orphans == ORPH_REMOVE) {
2222 Log("Removing orphan special inode %s of type %d\n",
2223 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2226 Log("Ignoring orphan special inode %s of type %d\n",
2227 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2228 /* fall through to the ip->linkCount--; line below */
2231 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2232 allinodesobsolete = 0;
2234 if (!check && ip->u.special.type != VI_LINKTABLE)
2235 ip->linkCount--; /* Keep the inode around */
2243 if (allinodesobsolete) {
2250 salvinfo->VGLinkH_cnt++; /* one for every header. */
2252 if (!RW && !check && isp->volSummary) {
2253 ClearROInUseBit(isp->volSummary);
2257 for (i = 0; i < MAXINODETYPE; i++) {
2258 if (stuff[i].inodeType == VI_LINKTABLE) {
2259 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2260 * And we may have recreated the link table earlier, so set the
2261 * RW header as well.
2263 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2264 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2268 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2272 if (isp->volSummary == NULL) {
2274 char headerName[64];
2275 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2276 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2278 Log("No header file for volume %u\n", isp->volumeId);
2282 Log("No header file for volume %u; %screating %s\n",
2283 isp->volumeId, (Testing ? "it would have been " : ""),
2285 isp->volSummary = (struct VolumeSummary *)
2286 malloc(sizeof(struct VolumeSummary));
2287 isp->volSummary->fileName = ToString(headerName);
2289 writefunc = VCreateVolumeDiskHeader;
2292 char headerName[64];
2293 /* hack: these two fields are obsolete... */
2294 isp->volSummary->header.volumeAcl = 0;
2295 isp->volSummary->header.volumeMountTable = 0;
2298 (&isp->volSummary->header, &tempHeader,
2299 sizeof(struct VolumeHeader))) {
2300 /* We often remove the name before calling us, so we make a fake one up */
2301 if (isp->volSummary->fileName) {
2302 strcpy(headerName, isp->volSummary->fileName);
2304 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2305 isp->volSummary->fileName = ToString(headerName);
2307 (void)afs_snprintf(path, sizeof path, "%s/%s", salvinfo->fileSysPath, headerName);
2309 Log("Header file %s is damaged or no longer valid%s\n", path,
2310 (check ? "" : "; repairing"));
2314 writefunc = VWriteVolumeDiskHeader;
2318 memcpy(&isp->volSummary->header, &tempHeader,
2319 sizeof(struct VolumeHeader));
2322 Log("It would have written a new header file for volume %u\n",
2326 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2327 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2329 Log("Error %ld writing volume header file for volume %lu\n",
2330 afs_printable_int32_ld(code),
2331 afs_printable_uint32_lu(diskHeader.id));
2336 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2337 isp->volSummary->header.volumeInfo);
2342 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2343 struct InodeSummary *isp, int check, int *deleteMe)
2346 VolumeDiskData volumeInfo;
2347 struct versionStamp fileHeader;
2356 #ifndef AFS_NAMEI_ENV
2357 if (sp->inodeType == VI_LINKTABLE)
2360 if (*(sp->inode) == 0) {
2362 Log("Missing inode in volume header (%s)\n", sp->description);
2366 Log("Missing inode in volume header (%s); %s\n", sp->description,
2367 (Testing ? "it would have recreated it" : "recreating"));
2370 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2371 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2372 if (!VALID_INO(*(sp->inode)))
2374 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2375 sp->description, errno);
2380 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2381 fdP = IH_OPEN(specH);
2382 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2383 /* bail out early and destroy the volume */
2385 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2392 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2393 sp->description, errno);
2396 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2397 || header.fileHeader.magic != sp->stamp.magic)) {
2399 Log("Part of the header (%s) is corrupted\n", sp->description);
2400 FDH_REALLYCLOSE(fdP);
2404 Log("Part of the header (%s) is corrupted; recreating\n",
2407 /* header can be garbage; make sure we don't read garbage data from
2409 memset(&header, 0, sizeof(header));
2411 if (sp->inodeType == VI_VOLINFO
2412 && header.volumeInfo.destroyMe == DESTROY_ME) {
2415 FDH_REALLYCLOSE(fdP);
2419 if (recreate && !Testing) {
2422 ("Internal error: recreating volume header (%s) in check mode\n",
2424 nBytes = FDH_TRUNC(fdP, 0);
2426 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2427 sp->description, errno);
2429 /* The following code should be moved into vutil.c */
2430 if (sp->inodeType == VI_VOLINFO) {
2432 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2433 header.volumeInfo.stamp = sp->stamp;
2434 header.volumeInfo.id = isp->volumeId;
2435 header.volumeInfo.parentId = isp->RWvolumeId;
2436 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2437 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2438 isp->volumeId, isp->volumeId);
2439 header.volumeInfo.inService = 0;
2440 header.volumeInfo.blessed = 0;
2441 /* The + 1000 is a hack in case there are any files out in venus caches */
2442 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2443 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2444 header.volumeInfo.needsCallback = 0;
2445 gettimeofday(&tp, 0);
2446 header.volumeInfo.creationDate = tp.tv_sec;
2448 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2449 sizeof(header.volumeInfo), 0);
2450 if (nBytes != sizeof(header.volumeInfo)) {
2453 ("Unable to write volume header file (%s) (errno = %d)\n",
2454 sp->description, errno);
2455 Abort("Unable to write entire volume header file (%s)\n",
2459 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2460 if (nBytes != sizeof(sp->stamp)) {
2463 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2464 sp->description, errno);
2466 ("Unable to write entire version stamp in volume header file (%s)\n",
2471 FDH_REALLYCLOSE(fdP);
2473 if (sp->inodeType == VI_VOLINFO) {
2474 salvinfo->VolInfo = header.volumeInfo;
2478 if (salvinfo->VolInfo.updateDate) {
2479 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2481 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2482 salvinfo->VolInfo.id,
2483 (Testing ? "it would have been " : ""), update);
2485 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2487 Log("%s (%u) not updated (created %s)\n",
2488 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2498 SalvageVnodes(struct SalvInfo *salvinfo,
2499 struct InodeSummary *rwIsp,
2500 struct InodeSummary *thisIsp,
2501 struct ViceInodeInfo *inodes, int check)
2503 int ilarge, ismall, ioffset, RW, nInodes;
2504 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2507 RW = (rwIsp == thisIsp);
2508 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2510 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2511 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2512 if (check && ismall == -1)
2515 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2516 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2517 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2521 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2522 struct ViceInodeInfo *ip, int nInodes,
2523 struct VolumeSummary *volSummary, int check)
2525 char buf[SIZEOF_LARGEDISKVNODE];
2526 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2528 StreamHandle_t *file;
2529 struct VnodeClassInfo *vcp;
2531 afs_sfsize_t nVnodes;
2532 afs_fsize_t vnodeLength;
2534 afs_ino_str_t stmp1, stmp2;
2538 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2539 fdP = IH_OPEN(handle);
2540 osi_Assert(fdP != NULL);
2541 file = FDH_FDOPEN(fdP, "r+");
2542 osi_Assert(file != NULL);
2543 vcp = &VnodeClassInfo[class];
2544 size = OS_SIZE(fdP->fd_fd);
2545 osi_Assert(size != -1);
2546 nVnodes = (size / vcp->diskSize) - 1;
2548 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2549 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2553 for (vnodeIndex = 0;
2554 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2555 nVnodes--, vnodeIndex++) {
2556 if (vnode->type != vNull) {
2557 int vnodeChanged = 0;
2558 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2559 if (VNDISK_GET_INO(vnode) == 0) {
2561 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2562 memset(vnode, 0, vcp->diskSize);
2566 if (vcp->magic != vnode->vnodeMagic) {
2567 /* bad magic #, probably partially created vnode */
2568 Log("Partially allocated vnode %d deleted.\n",
2570 memset(vnode, 0, vcp->diskSize);
2574 /* ****** Should do a bit more salvage here: e.g. make sure
2575 * vnode type matches what it should be given the index */
2576 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2577 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2578 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2579 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2586 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2587 /* The following doesn't work, because the version number
2588 * is not maintained correctly by the file server */
2589 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2590 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2592 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2598 /* For RW volume, look for vnode with matching inode number;
2599 * if no such match, take the first determined by our sort
2601 struct ViceInodeInfo *lip = ip;
2602 int lnInodes = nInodes;
2604 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2605 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2614 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2615 /* "Matching" inode */
2619 vu = vnode->uniquifier;
2620 iu = ip->u.vnode.vnodeUniquifier;
2621 vd = vnode->dataVersion;
2622 id = ip->u.vnode.inodeDataVersion;
2624 * Because of the possibility of the uniquifier overflows (> 4M)
2625 * we compare them modulo the low 22-bits; we shouldn't worry
2626 * about mismatching since they shouldn't to many old
2627 * uniquifiers of the same vnode...
2629 if (IUnique(vu) != IUnique(iu)) {
2631 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2634 vnode->uniquifier = iu;
2635 #ifdef AFS_3DISPARES
2636 vnode->dataVersion = (id >= vd ?
2639 1887437 ? vd : id) :
2642 1887437 ? id : vd));
2644 #if defined(AFS_SGI_EXMAG)
2645 vnode->dataVersion = (id >= vd ?
2648 15099494 ? vd : id) :
2651 15099494 ? id : vd));
2653 vnode->dataVersion = (id > vd ? id : vd);
2654 #endif /* AFS_SGI_EXMAG */
2655 #endif /* AFS_3DISPARES */
2658 /* don't bother checking for vd > id any more, since
2659 * partial file transfers always result in this state,
2660 * and you can't do much else anyway (you've already
2661 * found the best data you can) */
2662 #ifdef AFS_3DISPARES
2663 if (!vnodeIsDirectory(vnodeNumber)
2664 && ((vd < id && (id - vd) < 1887437)
2665 || ((vd > id && (vd - id) > 1887437)))) {
2667 #if defined(AFS_SGI_EXMAG)
2668 if (!vnodeIsDirectory(vnodeNumber)
2669 && ((vd < id && (id - vd) < 15099494)
2670 || ((vd > id && (vd - id) > 15099494)))) {
2672 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2673 #endif /* AFS_SGI_EXMAG */
2676 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2677 vnode->dataVersion = id;
2682 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2685 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2687 VNDISK_SET_INO(vnode, ip->inodeNumber);
2692 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2694 VNDISK_SET_INO(vnode, ip->inodeNumber);
2697 VNDISK_GET_LEN(vnodeLength, vnode);
2698 if (ip->byteCount != vnodeLength) {
2701 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2706 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2707 VNDISK_SET_LEN(vnode, ip->byteCount);
2711 ip->linkCount--; /* Keep the inode around */
2714 } else { /* no matching inode */
2716 if (VNDISK_GET_INO(vnode) != 0
2717 || vnode->type == vDirectory) {
2718 /* No matching inode--get rid of the vnode */
2720 if (VNDISK_GET_INO(vnode)) {
2722 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2726 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2731 if (VNDISK_GET_INO(vnode)) {
2733 time_t serverModifyTime = vnode->serverModifyTime;
2734 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2738 time_t serverModifyTime = vnode->serverModifyTime;
2739 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2742 memset(vnode, 0, vcp->diskSize);
2745 /* Should not reach here becuase we checked for
2746 * (inodeNumber == 0) above. And where we zero the vnode,
2747 * we also goto vnodeDone.
2751 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2755 } /* VNDISK_GET_INO(vnode) != 0 */
2757 osi_Assert(!(vnodeChanged && check));
2758 if (vnodeChanged && !Testing) {
2759 osi_Assert(IH_IWRITE
2760 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2761 (char *)vnode, vcp->diskSize)
2763 salvinfo->VolumeChanged = 1; /* For break call back */
2774 struct VnodeEssence *
2775 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2778 struct VnodeInfo *vip;
2781 class = vnodeIdToClass(vnodeNumber);
2782 vip = &salvinfo->vnodeInfo[class];
2783 offset = vnodeIdToBitNumber(vnodeNumber);
2784 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2788 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2790 /* Copy the directory unconditionally if we are going to change it:
2791 * not just if was cloned.
2793 struct VnodeDiskObject vnode;
2794 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2795 Inode oldinode, newinode;
2798 if (dir->copied || Testing)
2800 DFlush(); /* Well justified paranoia... */
2803 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2804 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2806 osi_Assert(code == sizeof(vnode));
2807 oldinode = VNDISK_GET_INO(&vnode);
2808 /* Increment the version number by a whole lot to avoid problems with
2809 * clients that were promised new version numbers--but the file server
2810 * crashed before the versions were written to disk.
2813 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2814 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2816 osi_Assert(VALID_INO(newinode));
2817 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2819 VNDISK_SET_INO(&vnode, newinode);
2821 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2822 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2824 osi_Assert(code == sizeof(vnode));
2826 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2827 salvinfo->fileSysDevice, newinode,
2828 &salvinfo->VolumeChanged);
2829 /* Don't delete the original inode right away, because the directory is
2830 * still being scanned.
2836 * This function should either successfully create a new dir, or give up
2837 * and leave things the way they were. In particular, if it fails to write
2838 * the new dir properly, it should return w/o changing the reference to the
2842 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2844 struct VnodeDiskObject vnode;
2845 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2846 Inode oldinode, newinode;
2851 afs_int32 parentUnique = 1;
2852 struct VnodeEssence *vnodeEssence;
2857 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2859 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2860 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2862 osi_Assert(lcode == sizeof(vnode));
2863 oldinode = VNDISK_GET_INO(&vnode);
2864 /* Increment the version number by a whole lot to avoid problems with
2865 * clients that were promised new version numbers--but the file server
2866 * crashed before the versions were written to disk.
2869 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2870 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2872 osi_Assert(VALID_INO(newinode));
2873 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2874 &salvinfo->VolumeChanged);
2876 /* Assign . and .. vnode numbers from dir and vnode.parent.
2877 * The uniquifier for . is in the vnode.
2878 * The uniquifier for .. might be set to a bogus value of 1 and
2879 * the salvager will later clean it up.
2881 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2882 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2885 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2887 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2892 /* didn't really build the new directory properly, let's just give up. */
2893 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2894 Log("Directory salvage returned code %d, continuing.\n", code);
2896 Log("also failed to decrement link count on new inode");
2900 Log("Checking the results of the directory salvage...\n");
2901 if (!DirOK(&newdir)) {
2902 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2903 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2904 osi_Assert(code == 0);
2908 VNDISK_SET_INO(&vnode, newinode);
2909 length = Length(&newdir);
2910 VNDISK_SET_LEN(&vnode, length);
2912 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2913 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2915 osi_Assert(lcode == sizeof(vnode));
2918 nt_sync(salvinfo->fileSysDevice);
2920 sync(); /* this is slow, but hopefully rarely called. We don't have
2921 * an open FD on the file itself to fsync.
2925 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2927 /* make sure old directory file is really closed */
2928 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2929 FDH_REALLYCLOSE(fdP);
2931 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2932 osi_Assert(code == 0);
2933 dir->dirHandle = newdir;
2937 * arguments for JudgeEntry.
2939 struct judgeEntry_params {
2940 struct DirSummary *dir; /**< directory we're examining entries in */
2941 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2945 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2948 struct judgeEntry_params *params = arock;
2949 struct DirSummary *dir = params->dir;
2950 struct SalvInfo *salvinfo = params->salvinfo;
2951 struct VnodeEssence *vnodeEssence;
2952 afs_int32 dirOrphaned, todelete;
2954 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2956 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2957 if (vnodeEssence == NULL) {
2959 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2962 CopyOnWrite(salvinfo, dir);
2963 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2968 #ifndef AFS_NAMEI_ENV
2969 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2970 * mount inode for the partition. If this inode were deleted, it would crash
2973 if (vnodeEssence->InodeNumber == 0) {
2974 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2976 CopyOnWrite(salvinfo, dir);
2977 osi_Assert(Delete(&dir->dirHandle, name) == 0);
2984 if (!(vnodeNumber & 1) && !Showmode
2985 && !(vnodeEssence->count || vnodeEssence->unique
2986 || vnodeEssence->modeBits)) {
2987 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2988 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2989 vnodeNumber, unique,
2990 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2994 CopyOnWrite(salvinfo, dir);
2995 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3001 /* Check if the Uniquifiers match. If not, change the directory entry
3002 * so its unique matches the vnode unique. Delete if the unique is zero
3003 * or if the directory is orphaned.
3005 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3006 if (!vnodeEssence->unique
3007 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3008 /* This is an orphaned directory. Don't delete the . or ..
3009 * entry. Otherwise, it will get created in the next
3010 * salvage and deleted again here. So Just skip it.
3015 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3018 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3022 fid.Vnode = vnodeNumber;
3023 fid.Unique = vnodeEssence->unique;
3024 CopyOnWrite(salvinfo, dir);
3025 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3027 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3030 return 0; /* no need to continue */
3033 if (strcmp(name, ".") == 0) {
3034 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3037 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3039 CopyOnWrite(salvinfo, dir);
3040 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3041 fid.Vnode = dir->vnodeNumber;
3042 fid.Unique = dir->unique;
3043 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3046 vnodeNumber = fid.Vnode; /* Get the new Essence */
3047 unique = fid.Unique;
3048 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3051 } else if (strcmp(name, "..") == 0) {
3054 struct VnodeEssence *dotdot;
3055 pa.Vnode = dir->parent;
3056 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3057 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3058 pa.Unique = dotdot->unique;
3060 pa.Vnode = dir->vnodeNumber;
3061 pa.Unique = dir->unique;
3063 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3065 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3067 CopyOnWrite(salvinfo, dir);
3068 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3069 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3072 vnodeNumber = pa.Vnode; /* Get the new Essence */
3074 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3076 dir->haveDotDot = 1;
3077 } else if (strncmp(name, ".__afs", 6) == 0) {
3079 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3082 CopyOnWrite(salvinfo, dir);
3083 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3085 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3086 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3089 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3090 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3091 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3092 && !(vnodeEssence->modeBits & 0111)) {
3099 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3100 vnodeEssence->InodeNumber);
3103 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3107 size = FDH_SIZE(fdP);
3109 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3110 FDH_REALLYCLOSE(fdP);
3117 nBytes = FDH_PREAD(fdP, buf, size, 0);
3118 if (nBytes == size) {
3120 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3121 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3122 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3123 Testing ? "would convert" : "converted");
3124 vnodeEssence->modeBits |= 0111;
3125 vnodeEssence->changed = 1;
3126 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3127 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3128 dir->name ? dir->name : "??", name, buf);
3130 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3131 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3133 FDH_REALLYCLOSE(fdP);
3136 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3137 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3138 if (vnodeIdToClass(vnodeNumber) == vLarge
3139 && vnodeEssence->name == NULL) {
3141 if ((n = (char *)malloc(strlen(name) + 1)))
3143 vnodeEssence->name = n;
3146 /* The directory entry points to the vnode. Check to see if the
3147 * vnode points back to the directory. If not, then let the
3148 * directory claim it (else it might end up orphaned). Vnodes
3149 * already claimed by another directory are deleted from this
3150 * directory: hardlinks to the same vnode are not allowed
3151 * from different directories.
3153 if (vnodeEssence->parent != dir->vnodeNumber) {
3154 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3155 /* Vnode does not point back to this directory.
3156 * Orphaned dirs cannot claim a file (it may belong to
3157 * another non-orphaned dir).
3160 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3162 vnodeEssence->parent = dir->vnodeNumber;
3163 vnodeEssence->changed = 1;
3165 /* Vnode was claimed by another directory */
3168 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3169 } else if (vnodeNumber == 1) {
3170 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3172 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3176 CopyOnWrite(salvinfo, dir);
3177 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3182 /* This directory claims the vnode */
3183 vnodeEssence->claimed = 1;
3185 vnodeEssence->count--;
3190 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3191 VnodeClass class, Inode ino, Unique * maxu)
3193 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3194 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3195 char buf[SIZEOF_LARGEDISKVNODE];
3196 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3198 StreamHandle_t *file;
3203 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3204 fdP = IH_OPEN(vip->handle);
3205 osi_Assert(fdP != NULL);
3206 file = FDH_FDOPEN(fdP, "r+");
3207 osi_Assert(file != NULL);
3208 size = OS_SIZE(fdP->fd_fd);
3209 osi_Assert(size != -1);
3210 vip->nVnodes = (size / vcp->diskSize) - 1;
3211 if (vip->nVnodes > 0) {
3212 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3213 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3214 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3215 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3216 if (class == vLarge) {
3217 osi_Assert((vip->inodes = (Inode *)
3218 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3227 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3228 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3229 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3230 nVnodes--, vnodeIndex++) {
3231 if (vnode->type != vNull) {
3232 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3233 afs_fsize_t vnodeLength;
3234 vip->nAllocatedVnodes++;
3235 vep->count = vnode->linkCount;
3236 VNDISK_GET_LEN(vnodeLength, vnode);
3237 vep->blockCount = nBlocks(vnodeLength);
3238 vip->volumeBlockCount += vep->blockCount;
3239 vep->parent = vnode->parent;
3240 vep->unique = vnode->uniquifier;
3241 if (*maxu < vnode->uniquifier)
3242 *maxu = vnode->uniquifier;
3243 vep->modeBits = vnode->modeBits;
3244 vep->InodeNumber = VNDISK_GET_INO(vnode);
3245 vep->type = vnode->type;
3246 vep->author = vnode->author;
3247 vep->owner = vnode->owner;
3248 vep->group = vnode->group;
3249 if (vnode->type == vDirectory) {
3250 if (class != vLarge) {
3251 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3252 vip->nAllocatedVnodes--;
3253 memset(vnode, 0, sizeof(vnode));
3254 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3255 vnodeIndexOffset(vcp, vnodeNumber),
3256 (char *)&vnode, sizeof(vnode));
3257 salvinfo->VolumeChanged = 1;
3259 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3268 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3271 struct VnodeEssence *parentvp;
3277 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3278 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3280 strcat(path, vp->name);
3286 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3287 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3290 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3292 struct VnodeEssence *vep;
3295 return (1); /* Vnode zero does not exist */
3297 return (0); /* The root dir vnode is always claimed */
3298 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3299 if (!vep || !vep->claimed)
3300 return (1); /* Vnode is not claimed - it is orphaned */
3302 return (IsVnodeOrphaned(salvinfo, vep->parent));
3306 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3307 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3308 struct DirSummary *rootdir, int *rootdirfound)
3310 static struct DirSummary dir;
3311 static struct DirHandle dirHandle;
3312 struct VnodeEssence *parent;
3313 static char path[MAXPATHLEN];
3316 if (dirVnodeInfo->vnodes[i].salvaged)
3317 return; /* already salvaged */
3320 dirVnodeInfo->vnodes[i].salvaged = 1;
3322 if (dirVnodeInfo->inodes[i] == 0)
3323 return; /* Not allocated to a directory */
3325 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3326 if (dirVnodeInfo->vnodes[i].parent) {
3327 Log("Bad parent, vnode 1; %s...\n",
3328 (Testing ? "skipping" : "salvaging"));
3329 dirVnodeInfo->vnodes[i].parent = 0;
3330 dirVnodeInfo->vnodes[i].changed = 1;
3333 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3334 if (parent && parent->salvaged == 0)
3335 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3336 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3337 rootdir, rootdirfound);
3340 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3341 dir.unique = dirVnodeInfo->vnodes[i].unique;
3344 dir.parent = dirVnodeInfo->vnodes[i].parent;
3345 dir.haveDot = dir.haveDotDot = 0;
3346 dir.ds_linkH = alinkH;
3347 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3348 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3350 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3353 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3354 (Testing ? "skipping" : "salvaging"));
3357 CopyAndSalvage(salvinfo, &dir);
3359 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3362 dirHandle = dir.dirHandle;
3365 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3366 &dirVnodeInfo->vnodes[i], path);
3369 /* If enumeration failed for random reasons, we will probably delete
3370 * too much stuff, so we guard against this instead.
3372 struct judgeEntry_params judge_params;
3373 judge_params.salvinfo = salvinfo;
3374 judge_params.dir = &dir;
3376 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3379 /* Delete the old directory if it was copied in order to salvage.
3380 * CopyOnWrite has written the new inode # to the disk, but we still
3381 * have the old one in our local structure here. Thus, we idec the
3385 if (dir.copied && !Testing) {
3386 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3387 osi_Assert(code == 0);
3388 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3391 /* Remember rootdir DirSummary _after_ it has been judged */
3392 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3393 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3401 * Get a new FID that can be used to create a new file.
3403 * @param[in] volHeader vol header for the volume
3404 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3405 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3406 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3407 * updated to the new max unique if we create a new
3411 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3412 VnodeClass class, AFSFid *afid, Unique *maxunique)
3415 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3416 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3420 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3421 /* no free vnodes; make a new one */
3422 salvinfo->vnodeInfo[class].nVnodes++;
3423 salvinfo->vnodeInfo[class].vnodes =
3424 realloc(salvinfo->vnodeInfo[class].vnodes,
3425 sizeof(struct VnodeEssence) * (i+1));
3427 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3430 afid->Vnode = bitNumberToVnodeNumber(i, class);
3432 if (volHeader->uniquifier < (*maxunique + 1)) {
3433 /* header uniq is bad; it will get bumped by 2000 later */
3434 afid->Unique = *maxunique + 1 + 2000;
3437 /* header uniq seems okay; just use that */
3438 afid->Unique = *maxunique = volHeader->uniquifier++;
3443 * Create a vnode for a README file explaining not to use a recreated-root vol.
3445 * @param[in] volHeader vol header for the volume
3446 * @param[in] alinkH ihandle for i/o for the volume
3447 * @param[in] vid volume id
3448 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3449 * updated to the new max unique if we create a new
3451 * @param[out] afid FID for the new readme vnode
3452 * @param[out] ainode the inode for the new readme file
3454 * @return operation status
3459 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3460 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3464 struct VnodeDiskObject *rvnode = NULL;
3466 IHandle_t *readmeH = NULL;
3467 struct VnodeEssence *vep;
3469 time_t now = time(NULL);
3471 /* Try to make the note brief, but informative. Only administrators should
3472 * be able to read this file at first, so we can hopefully assume they
3473 * know what AFS is, what a volume is, etc. */
3475 "This volume has been salvaged, but has lost its original root directory.\n"
3476 "The root directory that exists now has been recreated from orphan files\n"
3477 "from the rest of the volume. This recreated root directory may interfere\n"
3478 "with old cached data on clients, and there is no way the salvager can\n"
3479 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3480 "use this volume, but only copy the salvaged data to a new volume.\n"
3481 "Continuing to use this volume as it exists now may cause some clients to\n"
3482 "behave oddly when accessing this volume.\n"
3483 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3484 /* ^ the person reading this probably just lost some data, so they could
3485 * use some cheering up. */
3487 /* -1 for the trailing NUL */
3488 length = sizeof(readme) - 1;
3490 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3492 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3494 /* create the inode and write the contents */
3495 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3496 salvinfo->fileSysPath, 0, vid,
3497 afid->Vnode, afid->Unique, 1);
3498 if (!VALID_INO(readmeinode)) {
3499 Log("CreateReadme: readme IH_CREATE failed\n");
3503 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3504 bytes = IH_IWRITE(readmeH, 0, readme, length);
3505 IH_RELEASE(readmeH);
3507 if (bytes != length) {
3508 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3509 (int)sizeof(readme));
3513 /* create the vnode and write it out */
3514 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3516 Log("CreateRootDir: error alloc'ing memory\n");
3520 rvnode->type = vFile;
3522 rvnode->modeBits = 0777;
3523 rvnode->linkCount = 1;
3524 VNDISK_SET_LEN(rvnode, length);
3525 rvnode->uniquifier = afid->Unique;
3526 rvnode->dataVersion = 1;
3527 VNDISK_SET_INO(rvnode, readmeinode);
3528 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3533 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3535 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3536 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3537 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3539 if (bytes != SIZEOF_SMALLDISKVNODE) {
3540 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3541 (int)SIZEOF_SMALLDISKVNODE);
3545 /* update VnodeEssence for new readme vnode */
3546 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3548 vep->blockCount = nBlocks(length);
3549 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3550 vep->parent = rvnode->parent;
3551 vep->unique = rvnode->uniquifier;
3552 vep->modeBits = rvnode->modeBits;
3553 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3554 vep->type = rvnode->type;
3555 vep->author = rvnode->author;
3556 vep->owner = rvnode->owner;
3557 vep->group = rvnode->group;
3567 *ainode = readmeinode;
3572 if (IH_DEC(alinkH, readmeinode, vid)) {
3573 Log("CreateReadme (recovery): IH_DEC failed\n");
3585 * create a root dir for a volume that lacks one.
3587 * @param[in] volHeader vol header for the volume
3588 * @param[in] alinkH ihandle for disk access for this volume group
3589 * @param[in] vid volume id we're dealing with
3590 * @param[out] rootdir populated with info about the new root dir
3591 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3592 * updated to the new max unique if we create a new
3595 * @return operation status
3600 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3601 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3605 int decroot = 0, decreadme = 0;
3606 AFSFid did, readmeid;
3609 struct VnodeDiskObject *rootvnode = NULL;
3610 struct acl_accessList *ACL;
3613 struct VnodeEssence *vep;
3615 time_t now = time(NULL);
3617 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3618 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3622 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3623 /* We don't have any large vnodes in the volume; allocate room
3624 * for one so we can recreate the root dir */
3625 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3626 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3627 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3629 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3630 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3633 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3634 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3635 if (vep->type != vNull) {
3636 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3640 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3641 &readmeinode) != 0) {
3646 /* set the DV to a very high number, so it is unlikely that we collide
3647 * with a cached DV */
3650 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3652 if (!VALID_INO(rootinode)) {
3653 Log("CreateRootDir: IH_CREATE failed\n");
3658 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3659 rootinode, &salvinfo->VolumeChanged);
3663 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3664 Log("CreateRootDir: MakeDir failed\n");
3667 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3668 Log("CreateRootDir: Create failed\n");
3672 length = Length(&rootdir->dirHandle);
3673 DZap((void *)&rootdir->dirHandle);
3675 /* create the new root dir vnode */
3676 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3678 Log("CreateRootDir: malloc failed\n");
3682 /* only give 'rl' permissions to 'system:administrators'. We do this to
3683 * try to catch the attention of an administrator, that they should not
3684 * be writing to this directory or continue to use it. */
3685 ACL = VVnodeDiskACL(rootvnode);
3686 ACL->size = sizeof(struct acl_accessList);
3687 ACL->version = ACL_ACLVERSION;
3691 ACL->entries[0].id = -204; /* system:administrators */
3692 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3694 rootvnode->type = vDirectory;
3695 rootvnode->cloned = 0;
3696 rootvnode->modeBits = 0777;
3697 rootvnode->linkCount = 2;
3698 VNDISK_SET_LEN(rootvnode, length);
3699 rootvnode->uniquifier = 1;
3700 rootvnode->dataVersion = dv;
3701 VNDISK_SET_INO(rootvnode, rootinode);
3702 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3703 rootvnode->author = 0;
3704 rootvnode->owner = 0;
3705 rootvnode->parent = 0;
3706 rootvnode->group = 0;
3707 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3709 /* write it out to disk */
3710 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3711 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3712 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3714 if (bytes != SIZEOF_LARGEDISKVNODE) {
3715 /* just cast to int and don't worry about printing real 64-bit ints;
3716 * a large disk vnode isn't anywhere near the 32-bit limit */
3717 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3718 (int)SIZEOF_LARGEDISKVNODE);
3722 /* update VnodeEssence for the new root vnode */
3723 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3725 vep->blockCount = nBlocks(length);
3726 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3727 vep->parent = rootvnode->parent;
3728 vep->unique = rootvnode->uniquifier;
3729 vep->modeBits = rootvnode->modeBits;
3730 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3731 vep->type = rootvnode->type;
3732 vep->author = rootvnode->author;
3733 vep->owner = rootvnode->owner;
3734 vep->group = rootvnode->group;
3744 /* update DirSummary for the new root vnode */
3745 rootdir->vnodeNumber = 1;
3746 rootdir->unique = 1;
3747 rootdir->haveDot = 1;
3748 rootdir->haveDotDot = 1;
3749 rootdir->rwVid = vid;
3750 rootdir->copied = 0;
3751 rootdir->parent = 0;
3752 rootdir->name = strdup(".");
3753 rootdir->vname = volHeader->name;
3754 rootdir->ds_linkH = alinkH;
3761 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3762 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3764 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3765 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3775 * salvage a volume group.
3777 * @param[in] salvinfo information for the curent salvage job
3778 * @param[in] rwIsp inode summary for rw volume
3779 * @param[in] alinkH link table inode handle
3781 * @return operation status
3785 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3787 /* This routine, for now, will only be called for read-write volumes */
3789 int BlocksInVolume = 0, FilesInVolume = 0;
3791 struct DirSummary rootdir, oldrootdir;
3792 struct VnodeInfo *dirVnodeInfo;
3793 struct VnodeDiskObject vnode;
3794 VolumeDiskData volHeader;
3796 int orphaned, rootdirfound = 0;
3797 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3798 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3799 struct VnodeEssence *vep;
3802 afs_sfsize_t nBytes;
3804 VnodeId LFVnode, ThisVnode;
3805 Unique LFUnique, ThisUnique;
3809 vid = rwIsp->volSummary->header.id;
3810 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3811 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3812 osi_Assert(nBytes == sizeof(volHeader));
3813 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3814 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3815 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3817 DistilVnodeEssence(salvinfo, vid, vLarge,
3818 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3819 DistilVnodeEssence(salvinfo, vid, vSmall,
3820 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3822 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3823 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3824 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3825 &rootdir, &rootdirfound);
3828 nt_sync(salvinfo->fileSysDevice);
3830 sync(); /* This used to be done lower level, for every dir */
3837 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3839 Log("Cannot find root directory for volume %lu; attempting to create "
3840 "a new one\n", afs_printable_uint32_lu(vid));
3842 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3847 salvinfo->VolumeChanged = 1;
3851 /* Parse each vnode looking for orphaned vnodes and
3852 * connect them to the tree as orphaned (if requested).
3854 oldrootdir = rootdir;
3855 for (class = 0; class < nVNODECLASSES; class++) {
3856 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3857 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3858 ThisVnode = bitNumberToVnodeNumber(v, class);
3859 ThisUnique = vep->unique;
3861 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3862 continue; /* Ignore unused, claimed, and root vnodes */
3864 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3865 * entry in this vnode had incremented the parent link count (In
3866 * JudgeEntry()). We need to go to the parent and decrement that
3867 * link count. But if the parent's unique is zero, then the parent
3868 * link count was not incremented in JudgeEntry().
3870 if (class == vLarge) { /* directory vnode */
3871 pv = vnodeIdToBitNumber(vep->parent);
3872 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3873 if (vep->parent == 1 && newrootdir) {
3874 /* this vnode's parent was the volume root, and
3875 * we just created the volume root. So, the parent
3876 * dir didn't exist during JudgeEntry, so the link
3877 * count was not inc'd there, so don't dec it here.
3883 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3889 continue; /* If no rootdir, can't attach orphaned files */
3891 /* Here we attach orphaned files and directories into the
3892 * root directory, LVVnode, making sure link counts stay correct.
3894 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3895 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3896 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3898 /* Update this orphaned vnode's info. Its parent info and
3899 * link count (do for orphaned directories and files).
3901 vep->parent = LFVnode; /* Parent is the root dir */
3902 vep->unique = LFUnique;
3905 vep->count--; /* Inc link count (root dir will pt to it) */
3907 /* If this orphaned vnode is a directory, change '..'.
3908 * The name of the orphaned dir/file is unknown, so we
3909 * build a unique name. No need to CopyOnWrite the directory
3910 * since it is not connected to tree in BK or RO volume and
3911 * won't be visible there.
3913 if (class == vLarge) {
3917 /* Remove and recreate the ".." entry in this orphaned directory */
3918 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3919 salvinfo->vnodeInfo[class].inodes[v],
3920 &salvinfo->VolumeChanged);
3922 pa.Unique = LFUnique;
3923 osi_Assert(Delete(&dh, "..") == 0);
3924 osi_Assert(Create(&dh, "..", &pa) == 0);
3926 /* The original parent's link count was decremented above.
3927 * Here we increment the new parent's link count.
3929 pv = vnodeIdToBitNumber(LFVnode);
3930 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3934 /* Go to the root dir and add this entry. The link count of the
3935 * root dir was incremented when ".." was created. Try 10 times.
3937 for (j = 0; j < 10; j++) {
3938 pa.Vnode = ThisVnode;
3939 pa.Unique = ThisUnique;
3941 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3943 vLarge) ? "__ORPHANDIR__" :
3944 "__ORPHANFILE__"), ThisVnode,
3947 CopyOnWrite(salvinfo, &rootdir);
3948 code = Create(&rootdir.dirHandle, npath, &pa);
3952 ThisUnique += 50; /* Try creating a different file */
3954 osi_Assert(code == 0);
3955 Log("Attaching orphaned %s to volume's root dir as %s\n",
3956 ((class == vLarge) ? "directory" : "file"), npath);
3958 } /* for each vnode in the class */
3959 } /* for each class of vnode */
3961 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3963 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
3965 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3967 osi_Assert(code == 0);
3968 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3971 DFlush(); /* Flush the changes */
3972 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3973 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3974 orphans = ORPH_IGNORE;
3977 /* Write out all changed vnodes. Orphaned files and directories
3978 * will get removed here also (if requested).
3980 for (class = 0; class < nVNODECLASSES; class++) {
3981 int nVnodes = salvinfo->vnodeInfo[class].nVnodes;
3982 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3983 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
3984 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
3985 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
3986 for (i = 0; i < nVnodes; i++) {
3987 struct VnodeEssence *vnp = &vnodes[i];
3988 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3990 /* If the vnode is good but is unclaimed (not listed in
3991 * any directory entries), then it is orphaned.
3994 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
3995 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3999 if (vnp->changed || vnp->count) {
4002 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4003 vnodeIndexOffset(vcp, vnodeNumber),
4004 (char *)&vnode, sizeof(vnode));
4005 osi_Assert(nBytes == sizeof(vnode));
4007 vnode.parent = vnp->parent;
4008 oldCount = vnode.linkCount;
4009 vnode.linkCount = vnode.linkCount - vnp->count;
4012 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4014 if (!vnp->todelete) {
4015 /* Orphans should have already been attached (if requested) */
4016 osi_Assert(orphans != ORPH_ATTACH);
4017 oblocks += vnp->blockCount;
4020 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4022 BlocksInVolume -= vnp->blockCount;
4024 if (VNDISK_GET_INO(&vnode)) {
4026 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4027 osi_Assert(code == 0);
4029 memset(&vnode, 0, sizeof(vnode));
4031 } else if (vnp->count) {
4033 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4036 vnode.modeBits = vnp->modeBits;
4039 vnode.dataVersion++;
4042 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4043 vnodeIndexOffset(vcp, vnodeNumber),
4044 (char *)&vnode, sizeof(vnode));
4045 osi_Assert(nBytes == sizeof(vnode));
4047 salvinfo->VolumeChanged = 1;
4051 if (!Showmode && ofiles) {
4052 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4054 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4058 for (class = 0; class < nVNODECLASSES; class++) {
4059 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4060 for (i = 0; i < vip->nVnodes; i++)
4061 if (vip->vnodes[i].name)
4062 free(vip->vnodes[i].name);
4069 /* Set correct resource utilization statistics */
4070 volHeader.filecount = FilesInVolume;
4071 volHeader.diskused = BlocksInVolume;
4073 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4074 if (volHeader.uniquifier < (maxunique + 1)) {
4076 Log("Volume uniquifier is too low; fixed\n");
4077 /* Plus 2,000 in case there are workstations out there with
4078 * cached vnodes that have since been deleted
4080 volHeader.uniquifier = (maxunique + 1 + 2000);
4084 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4085 "Only use this salvaged volume to copy data to another volume; "
4086 "do not continue to use this volume (%lu) as-is.\n",
4087 afs_printable_uint32_lu(vid));
4090 #ifdef FSSYNC_BUILD_CLIENT
4091 if (!Testing && salvinfo->VolumeChanged) {
4092 afs_int32 fsync_code;
4094 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4096 Log("Error trying to tell the fileserver to break callbacks for "
4097 "changed volume %lu; error code %ld\n",
4098 afs_printable_uint32_lu(vid),
4099 afs_printable_int32_ld(fsync_code));
4101 salvinfo->VolumeChanged = 0;
4104 #endif /* FSSYNC_BUILD_CLIENT */
4106 /* Turn off the inUse bit; the volume's been salvaged! */
4107 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4108 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4109 volHeader.inService = 1; /* allow service again */
4110 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4111 volHeader.dontSalvage = DONT_SALVAGE;
4112 salvinfo->VolumeChanged = 0;
4114 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4115 osi_Assert(nBytes == sizeof(volHeader));
4118 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4119 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4120 FilesInVolume, BlocksInVolume);
4123 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4124 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4130 ClearROInUseBit(struct VolumeSummary *summary)
4132 IHandle_t *h = summary->volumeInfoHandle;
4133 afs_sfsize_t nBytes;
4135 VolumeDiskData volHeader;
4137 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4138 osi_Assert(nBytes == sizeof(volHeader));
4139 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4140 volHeader.inUse = 0;
4141 volHeader.needsSalvaged = 0;
4142 volHeader.inService = 1;
4143 volHeader.dontSalvage = DONT_SALVAGE;
4145 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4146 osi_Assert(nBytes == sizeof(volHeader));
4151 * Possible delete the volume.
4153 * deleteMe - Always do so, only a partial volume.
4156 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4157 char *message, int deleteMe, int check)
4159 if (readOnly(isp) || deleteMe) {
4160 if (isp->volSummary && isp->volSummary->fileName) {
4163 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4165 Log("It will be deleted on this server (you may find it elsewhere)\n");
4168 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4170 Log("it will be deleted instead. It should be recloned.\n");
4175 sprintf(path, "%s/%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4177 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4179 Log("Error %ld destroying volume disk header for volume %lu\n",
4180 afs_printable_int32_ld(code),
4181 afs_printable_uint32_lu(isp->volumeId));
4184 /* make sure we actually delete the fileName file; ENOENT
4185 * is fine, since VDestroyVolumeDiskHeader probably already
4187 if (unlink(path) && errno != ENOENT) {
4188 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4192 } else if (!check) {
4193 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4195 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4199 #ifdef AFS_DEMAND_ATTACH_FS
4201 * Locks a volume on disk for salvaging.
4203 * @param[in] volumeId volume ID to lock
4205 * @return operation status
4207 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4208 * checked out and locked again
4213 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4218 /* should always be WRITE_LOCK, but keep the lock-type logic all
4219 * in one place, in VVolLockType. Params will be ignored, but
4220 * try to provide what we're logically doing. */
4221 locktype = VVolLockType(V_VOLUPD, 1);
4223 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4225 if (code == EBUSY) {
4226 Abort("Someone else appears to be using volume %lu; Aborted\n",
4227 afs_printable_uint32_lu(volumeId));
4229 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4230 afs_printable_int32_ld(code),
4231 afs_printable_uint32_lu(volumeId));
4234 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4235 if (code == SYNC_DENIED) {
4236 /* need to retry checking out volumes */
4239 if (code != SYNC_OK) {
4240 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4241 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4244 /* set inUse = programType in the volume header to ensure that nobody
4245 * tries to use this volume again without salvaging, if we somehow crash
4246 * or otherwise exit before finishing the salvage.
4250 struct VolumeHeader header;
4251 struct VolumeDiskHeader diskHeader;
4252 struct VolumeDiskData volHeader;
4254 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4259 DiskToVolumeHeader(&header, &diskHeader);
4261 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4262 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4263 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4269 volHeader.inUse = programType;
4271 /* If we can't re-write the header, bail out and error. We don't
4272 * assert when reading the header, since it's possible the
4273 * header isn't really there (when there's no data associated
4274 * with the volume; we just delete the vol header file in that
4275 * case). But if it's there enough that we can read it, but
4276 * somehow we cannot write to it to signify we're salvaging it,
4277 * we've got a big problem and we cannot continue. */
4278 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4285 #endif /* AFS_DEMAND_ATTACH_FS */
4288 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4293 memset(&res, 0, sizeof(res));
4295 for (i = 0; i < 3; i++) {
4296 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4297 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4299 if (code == SYNC_OK) {
4301 } else if (code == SYNC_DENIED) {
4302 #ifdef DEMAND_ATTACH_ENABLE
4303 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4305 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4307 Abort("Salvage aborted\n");
4308 } else if (code == SYNC_BAD_COMMAND) {
4309 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4311 #ifdef DEMAND_ATTACH_ENABLE
4312 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4314 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4316 Abort("Salvage aborted\n");
4319 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4320 FSYNC_clientFinis();
4324 if (code != SYNC_OK) {
4325 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4326 Abort("Salvage aborted\n");
4331 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4335 for (i = 0; i < 3; i++) {
4336 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4337 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4339 if (code == SYNC_OK) {
4341 } else if (code == SYNC_DENIED) {
4342 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4343 } else if (code == SYNC_BAD_COMMAND) {
4344 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4346 #ifdef DEMAND_ATTACH_ENABLE
4347 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4349 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4354 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
4355 FSYNC_clientFinis();
4362 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4364 /* Volume parameter is passed in case iopen is upgraded in future to
4365 * require a volume Id to be passed
4368 IHandle_t *srcH, *destH;
4369 FdHandle_t *srcFdP, *destFdP;
4371 afs_foff_t size = 0;
4373 IH_INIT(srcH, device, rwvolume, inode1);
4374 srcFdP = IH_OPEN(srcH);
4375 osi_Assert(srcFdP != NULL);
4376 IH_INIT(destH, device, rwvolume, inode2);
4377 destFdP = IH_OPEN(destH);
4378 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4379 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4382 osi_Assert(nBytes == 0);
4383 FDH_REALLYCLOSE(srcFdP);
4384 FDH_REALLYCLOSE(destFdP);
4391 PrintInodeList(struct SalvInfo *salvinfo)
4393 struct ViceInodeInfo *ip;
4394 struct ViceInodeInfo *buf;
4395 struct afs_stat status;
4399 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4400 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4401 osi_Assert(buf != NULL);
4402 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4403 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4404 for (ip = buf; nInodes--; ip++) {
4405 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4406 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4407 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4408 ip->u.param[2], ip->u.param[3]);
4414 PrintInodeSummary(struct SalvInfo *salvinfo)
4417 struct InodeSummary *isp;
4419 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4420 isp = &salvinfo->inodeSummary[i];
4421 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4426 PrintVolumeSummary(struct SalvInfo *salvinfo)
4429 struct VolumeSummary *vsp;
4431 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4432 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4442 osi_Assert(0); /* Fork is never executed in the NT code path */
4446 #ifdef AFS_DEMAND_ATTACH_FS
4447 if ((f == 0) && (programType == salvageServer)) {
4448 /* we are a salvageserver child */
4449 #ifdef FSSYNC_BUILD_CLIENT
4450 VChildProcReconnectFS_r();
4452 #ifdef SALVSYNC_BUILD_CLIENT
4456 #endif /* AFS_DEMAND_ATTACH_FS */
4457 #endif /* !AFS_NT40_ENV */
4467 #ifdef AFS_DEMAND_ATTACH_FS
4468 if (programType == salvageServer) {
4469 #ifdef SALVSYNC_BUILD_CLIENT
4472 #ifdef FSSYNC_BUILD_CLIENT
4476 #endif /* AFS_DEMAND_ATTACH_FS */
4479 if (main_thread != pthread_self())
4480 pthread_exit((void *)code);
4493 pid = wait(&status);
4494 osi_Assert(pid != -1);
4495 if (WCOREDUMP(status))
4496 Log("\"%s\" core dumped!\n", prog);
4497 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4503 TimeStamp(time_t clock, int precision)
4506 static char timestamp[20];
4507 lt = localtime(&clock);
4509 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4511 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4516 CheckLogFile(char * log_path)
4518 char oldSlvgLog[AFSDIR_PATH_MAX];
4520 #ifndef AFS_NT40_ENV
4527 strcpy(oldSlvgLog, log_path);
4528 strcat(oldSlvgLog, ".old");
4530 renamefile(log_path, oldSlvgLog);
4531 logFile = afs_fopen(log_path, "a");
4533 if (!logFile) { /* still nothing, use stdout */
4537 #ifndef AFS_NAMEI_ENV
4538 AFS_DEBUG_IOPS_LOG(logFile);
4543 #ifndef AFS_NT40_ENV
4545 TimeStampLogFile(char * log_path)
4547 char stampSlvgLog[AFSDIR_PATH_MAX];
4552 lt = localtime(&now);
4553 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4554 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4555 log_path, lt->tm_year + 1900,
4556 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4559 /* try to link the logfile to a timestamped filename */
4560 /* if it fails, oh well, nothing we can do */
4561 link(log_path, stampSlvgLog);
4570 #ifndef AFS_NT40_ENV
4572 printf("Can't show log since using syslog.\n");
4583 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4586 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4589 while (fgets(line, sizeof(line), logFile))
4596 Log(const char *format, ...)
4602 va_start(args, format);
4603 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4605 #ifndef AFS_NT40_ENV
4607 syslog(LOG_INFO, "%s", tmp);
4611 gettimeofday(&now, 0);
4612 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4618 Abort(const char *format, ...)
4623 va_start(args, format);
4624 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4626 #ifndef AFS_NT40_ENV
4628 syslog(LOG_INFO, "%s", tmp);
4632 fprintf(logFile, "%s", tmp);
4644 ToString(const char *s)
4647 p = (char *)malloc(strlen(s) + 1);
4648 osi_Assert(p != NULL);
4653 /* Remove the FORCESALVAGE file */
4655 RemoveTheForce(char *path)
4658 struct afs_stat force; /* so we can use afs_stat to find it */
4659 strcpy(target,path);
4660 strcat(target,"/FORCESALVAGE");
4661 if (!Testing && ForceSalvage) {
4662 if (afs_stat(target,&force) == 0) unlink(target);
4666 #ifndef AFS_AIX32_ENV
4668 * UseTheForceLuke - see if we can use the force
4671 UseTheForceLuke(char *path)
4673 struct afs_stat force;
4675 strcpy(target,path);
4676 strcat(target,"/FORCESALVAGE");
4678 return (afs_stat(target, &force) == 0);
4682 * UseTheForceLuke - see if we can use the force
4685 * The VRMIX fsck will not muck with the filesystem it is supposedly
4686 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4687 * muck directly with the root inode, which is within the normal
4689 * ListViceInodes() has a side effect of setting ForceSalvage if
4690 * it detects a need, based on root inode examination.
4693 UseTheForceLuke(char *path)
4696 return 0; /* sorry OB1 */
4701 /* NT support routines */
4703 static char execpathname[MAX_PATH];
4705 nt_SalvagePartition(char *partName, int jobn)
4710 if (!*execpathname) {
4711 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4712 if (!n || n == 1023)
4715 job.cj_magic = SALVAGER_MAGIC;
4716 job.cj_number = jobn;
4717 (void)strcpy(job.cj_part, partName);
4718 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4723 nt_SetupPartitionSalvage(void *datap, int len)
4725 childJob_t *jobp = (childJob_t *) datap;
4726 char logname[AFSDIR_PATH_MAX];
4728 if (len != sizeof(childJob_t))
4730 if (jobp->cj_magic != SALVAGER_MAGIC)
4735 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4737 logFile = afs_fopen(logname, "w");
4745 #endif /* AFS_NT40_ENV */