2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
93 #include <sys/param.h>
97 #endif /* ITIMER_REAL */
103 #include <sys/stat.h>
108 #include <WINNT/afsevent.h>
111 #define WCOREDUMP(x) ((x) & 0200)
114 #include <afs/afsint.h>
115 #include <afs/afs_assert.h>
116 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
117 #if defined(AFS_VFSINCL_ENV)
118 #include <sys/vnode.h>
120 #include <sys/fs/ufs_inode.h>
122 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
123 #include <ufs/ufs/dinode.h>
124 #include <ufs/ffs/fs.h>
126 #include <ufs/inode.h>
129 #else /* AFS_VFSINCL_ENV */
131 #include <ufs/inode.h>
132 #else /* AFS_OSF_ENV */
133 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
134 #include <sys/inode.h>
137 #endif /* AFS_VFSINCL_ENV */
138 #endif /* AFS_SGI_ENV */
141 #include <sys/lockf.h>
145 #include <checklist.h>
147 #if defined(AFS_SGI_ENV)
152 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
155 #include <sys/mnttab.h>
156 #include <sys/mntent.h>
161 #endif /* AFS_SGI_ENV */
162 #endif /* AFS_HPUX_ENV */
167 #include <afs/osi_inode.h>
171 #include <afs/afsutil.h>
172 #include <afs/fileutil.h>
173 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
181 #include <afs/afssyscalls.h>
185 #include "partition.h"
186 #include "daemon_com.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
207 /*@+fcnmacros +macrofcndecl@*/
210 extern off64_t afs_lseek(int FD, off64_t O, int F);
211 #endif /*S_SPLINT_S */
212 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
213 #define afs_stat stat64
214 #define afs_fstat fstat64
215 #define afs_open open64
216 #define afs_fopen fopen64
217 #else /* !O_LARGEFILE */
219 extern off_t afs_lseek(int FD, off_t O, int F);
220 #endif /*S_SPLINT_S */
221 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
222 #define afs_stat stat
223 #define afs_fstat fstat
224 #define afs_open open
225 #define afs_fopen fopen
226 #endif /* !O_LARGEFILE */
227 /*@=fcnmacros =macrofcndecl@*/
230 extern void *calloc();
232 static char *TimeStamp(time_t clock, int precision);
235 int debug; /* -d flag */
236 extern int Testing; /* -n flag */
237 int ListInodeOption; /* -i flag */
238 int ShowRootFiles; /* -r flag */
239 int RebuildDirs; /* -sal flag */
240 int Parallel = 4; /* -para X flag */
241 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
242 int forceR = 0; /* -b flag */
243 int ShowLog = 0; /* -showlog flag */
244 int ShowSuid = 0; /* -showsuid flag */
245 int ShowMounts = 0; /* -showmounts flag */
246 int orphans = ORPH_IGNORE; /* -orphans option */
251 int useSyslog = 0; /* -syslog flag */
252 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
261 #define MAXPARALLEL 32
263 int OKToZap; /* -o flag */
264 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
265 * in the volume header */
267 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
269 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
272 * information that is 'global' to a particular salvage job.
275 Device fileSysDevice; /**< The device number of the current partition
277 char fileSysPath[8]; /**< The path of the mounted partition currently
278 * being salvaged, i.e. the directory containing
279 * the volume headers */
280 char *fileSysPathName; /**< NT needs this to make name pretty log. */
281 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
282 int VGLinkH_cnt; /**< # of references to lnk handle. */
283 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
286 char *fileSysDeviceName; /**< The block device where the file system being
287 * salvaged was mounted */
288 char *filesysfulldev;
290 int VolumeChanged; /**< Set by any routine which would change the
291 * volume in a way which would require callbacks
292 * to be broken if the volume was put back on
293 * on line by an active file server */
295 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
296 * header dealt with */
298 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
299 int inodeFd; /**< File descriptor for inode file */
301 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
302 int nVolumes; /**< Number of volumes (read-write and read-only)
303 * in volume summary */
304 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
307 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
308 * vnodes in the volume that
309 * we are currently looking
311 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
312 * to contact the fileserver over FSYNC */
319 /* Forward declarations */
320 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
321 static int AskVolumeSummary(struct SalvInfo *salvinfo,
322 VolumeId singleVolumeNumber);
324 #ifdef AFS_DEMAND_ATTACH_FS
325 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
326 #endif /* AFS_DEMAND_ATTACH_FS */
328 /* Uniquifier stored in the Inode */
333 return (u & 0x3fffff);
335 #if defined(AFS_SGI_EXMAG)
336 return (u & SGI_UNIQMASK);
339 #endif /* AFS_SGI_EXMAG */
346 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
348 return 0; /* otherwise may be transient, e.g. EMFILE */
353 char *save_args[MAX_ARGS];
355 extern pthread_t main_thread;
356 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
360 * Get the salvage lock if not already held. Hold until process exits.
362 * @param[in] locktype READ_LOCK or WRITE_LOCK
365 _ObtainSalvageLock(int locktype)
367 struct VLockFile salvageLock;
372 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
374 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
377 "salvager: There appears to be another salvager running! "
382 "salvager: Error %d trying to acquire salvage lock! "
388 ObtainSalvageLock(void)
390 _ObtainSalvageLock(WRITE_LOCK);
393 ObtainSharedSalvageLock(void)
395 _ObtainSalvageLock(READ_LOCK);
399 #ifdef AFS_SGI_XFS_IOPS_ENV
400 /* Check if the given partition is mounted. For XFS, the root inode is not a
401 * constant. So we check the hard way.
404 IsPartitionMounted(char *part)
407 struct mntent *mntent;
409 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
410 while (mntent = getmntent(mntfp)) {
411 if (!strcmp(part, mntent->mnt_dir))
416 return mntent ? 1 : 1;
419 /* Check if the given inode is the root of the filesystem. */
420 #ifndef AFS_SGI_XFS_IOPS_ENV
422 IsRootInode(struct afs_stat *status)
425 * The root inode is not a fixed value in XFS partitions. So we need to
426 * see if the partition is in the list of mounted partitions. This only
427 * affects the SalvageFileSys path, so we check there.
429 return (status->st_ino == ROOTINODE);
434 #ifndef AFS_NAMEI_ENV
435 /* We don't want to salvage big files filesystems, since we can't put volumes on
439 CheckIfBigFilesFS(char *mountPoint, char *devName)
441 struct superblock fs;
444 if (strncmp(devName, "/dev/", 5)) {
445 (void)sprintf(name, "/dev/%s", devName);
447 (void)strcpy(name, devName);
450 if (ReadSuper(&fs, name) < 0) {
451 Log("Unable to read superblock. Not salvaging partition %s.\n",
455 if (IsBigFilesFileSystem(&fs)) {
456 Log("Partition %s is a big files filesystem, not salvaging.\n",
466 #define HDSTR "\\Device\\Harddisk"
467 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
469 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
475 static int dowarn = 1;
477 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
479 if (strncmp(res1, HDSTR, HDLEN)) {
482 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
483 res1, HDSTR, p1->devName);
486 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
488 if (strncmp(res2, HDSTR, HDLEN)) {
491 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
492 res2, HDSTR, p2->devName);
496 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
499 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
502 /* This assumes that two partitions with the same device number divided by
503 * PartsPerDisk are on the same disk.
506 SalvageFileSysParallel(struct DiskPartition64 *partP)
509 struct DiskPartition64 *partP;
510 int pid; /* Pid for this job */
511 int jobnumb; /* Log file job number */
512 struct job *nextjob; /* Next partition on disk to salvage */
514 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
515 struct job *thisjob = 0;
516 static int numjobs = 0;
517 static int jobcount = 0;
523 char logFileName[256];
527 /* We have a partition to salvage. Copy it into thisjob */
528 thisjob = (struct job *)malloc(sizeof(struct job));
530 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
533 memset(thisjob, 0, sizeof(struct job));
534 thisjob->partP = partP;
535 thisjob->jobnumb = jobcount;
537 } else if (jobcount == 0) {
538 /* We are asking to wait for all jobs (partp == 0), yet we never
541 Log("No file system partitions named %s* found; not salvaged\n",
542 VICE_PARTITION_PREFIX);
546 if (debug || Parallel == 1) {
548 SalvageFileSys(thisjob->partP, 0);
555 /* Check to see if thisjob is for a disk that we are already
556 * salvaging. If it is, link it in as the next job to do. The
557 * jobs array has 1 entry per disk being salvages. numjobs is
558 * the total number of disks currently being salvaged. In
559 * order to keep thejobs array compact, when a disk is
560 * completed, the hightest element in the jobs array is moved
561 * down to now open slot.
563 for (j = 0; j < numjobs; j++) {
564 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
565 /* On same disk, add it to this list and return */
566 thisjob->nextjob = jobs[j]->nextjob;
567 jobs[j]->nextjob = thisjob;
574 /* Loop until we start thisjob or until all existing jobs are finished */
575 while (thisjob || (!partP && (numjobs > 0))) {
576 startjob = -1; /* No new job to start */
578 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
579 /* Either the max jobs are running or we have to wait for all
580 * the jobs to finish. In either case, we wait for at least one
581 * job to finish. When it's done, clean up after it.
583 pid = wait(&wstatus);
584 osi_Assert(pid != -1);
585 for (j = 0; j < numjobs; j++) { /* Find which job it is */
586 if (pid == jobs[j]->pid)
589 osi_Assert(j < numjobs);
590 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
591 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
594 numjobs--; /* job no longer running */
595 oldjob = jobs[j]; /* remember */
596 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
597 free(oldjob); /* free the old job */
599 /* If there is another partition on the disk to salvage, then
600 * say we will start it (startjob). If not, then put thisjob there
601 * and say we will start it.
603 if (jobs[j]) { /* Another partitions to salvage */
604 startjob = j; /* Will start it */
605 } else { /* There is not another partition to salvage */
607 jobs[j] = thisjob; /* Add thisjob */
609 startjob = j; /* Will start it */
611 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
612 startjob = -1; /* Don't start it - already running */
616 /* We don't have to wait for a job to complete */
618 jobs[numjobs] = thisjob; /* Add this job */
620 startjob = numjobs; /* Will start it */
624 /* Start up a new salvage job on a partition in job slot "startjob" */
625 if (startjob != -1) {
627 Log("Starting salvage of file system partition %s\n",
628 jobs[startjob]->partP->name);
630 /* For NT, we not only fork, but re-exec the salvager. Pass in the
631 * commands and pass the child job number via the data path.
634 nt_SalvagePartition(jobs[startjob]->partP->name,
635 jobs[startjob]->jobnumb);
636 jobs[startjob]->pid = pid;
641 jobs[startjob]->pid = pid;
647 for (fd = 0; fd < 16; fd++)
654 openlog("salvager", LOG_PID, useSyslogFacility);
658 (void)afs_snprintf(logFileName, sizeof logFileName,
660 AFSDIR_SERVER_SLVGLOG_FILEPATH,
661 jobs[startjob]->jobnumb);
662 logFile = afs_fopen(logFileName, "w");
667 SalvageFileSys1(jobs[startjob]->partP, 0);
672 } /* while ( thisjob || (!partP && numjobs > 0) ) */
674 /* If waited for all jobs to complete, now collect log files and return */
676 if (!useSyslog) /* if syslogging - no need to collect */
679 for (i = 0; i < jobcount; i++) {
680 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
681 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
682 if ((passLog = afs_fopen(logFileName, "r"))) {
683 while (fgets(buf, sizeof(buf), passLog)) {
688 (void)unlink(logFileName);
697 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
699 if (!canfork || debug || Fork() == 0) {
700 SalvageFileSys1(partP, singleVolumeNumber);
701 if (canfork && !debug) {
706 Wait("SalvageFileSys");
710 get_DevName(char *pbuffer, char *wpath)
712 char pbuf[128], *ptr;
713 strcpy(pbuf, pbuffer);
714 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
720 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
722 strcpy(pbuffer, ptr + 1);
729 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
732 char inodeListPath[256];
733 FILE *inodeFile = NULL;
734 static char tmpDevName[100];
735 static char wpath[100];
736 struct VolumeSummary *vsp, *esp;
740 struct SalvInfo l_salvinfo;
741 struct SalvInfo *salvinfo = &l_salvinfo;
744 memset(salvinfo, 0, sizeof(*salvinfo));
751 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
752 Abort("Raced too many times with fileserver restarts while trying to "
753 "checkout/lock volumes; Aborted\n");
755 #ifdef AFS_DEMAND_ATTACH_FS
757 /* unlock all previous volume locks, since we're about to lock them
759 VLockFileReinit(&partP->volLockFile);
761 #endif /* AFS_DEMAND_ATTACH_FS */
763 salvinfo->fileSysPartition = partP;
764 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
765 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
768 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
769 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
770 name = partP->devName;
772 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
773 strcpy(tmpDevName, partP->devName);
774 name = get_DevName(tmpDevName, wpath);
775 salvinfo->fileSysDeviceName = name;
776 salvinfo->filesysfulldev = wpath;
779 if (singleVolumeNumber) {
780 #ifndef AFS_DEMAND_ATTACH_FS
781 /* only non-DAFS locks the partition when salvaging a single volume;
782 * DAFS will lock the individual volumes in the VG */
783 VLockPartition(partP->name);
784 #endif /* !AFS_DEMAND_ATTACH_FS */
788 /* salvageserver already setup fssync conn for us */
789 if ((programType != salvageServer) && !VConnectFS()) {
790 Abort("Couldn't connect to file server\n");
793 salvinfo->useFSYNC = 1;
794 AskOffline(salvinfo, singleVolumeNumber);
795 #ifdef AFS_DEMAND_ATTACH_FS
796 if (LockVolume(salvinfo, singleVolumeNumber)) {
799 #endif /* AFS_DEMAND_ATTACH_FS */
802 salvinfo->useFSYNC = 0;
803 VLockPartition(partP->name);
807 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
810 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
811 partP->name, name, (Testing ? "(READONLY mode)" : ""));
813 Log("***Forced salvage of all volumes on this partition***\n");
818 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
825 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
826 while ((dp = readdir(dirp))) {
827 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
828 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
830 Log("Removing old salvager temp files %s\n", dp->d_name);
831 strcpy(npath, salvinfo->fileSysPath);
832 strcat(npath, OS_DIRSEP);
833 strcat(npath, dp->d_name);
839 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
841 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
842 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
844 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
848 inodeFile = fopen(inodeListPath, "w+b");
850 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
853 /* Using nt_unlink here since we're really using the delete on close
854 * semantics of unlink. In most places in the salvager, we really do
855 * mean to unlink the file at that point. Those places have been
856 * modified to actually do that so that the NT crt can be used there.
858 * jaltman - On NT delete on close cannot be applied to a file while the
859 * process has an open file handle that does not have DELETE file
860 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
861 * delete privileges. As a result the nt_unlink() call will always
864 code = nt_unlink(inodeListPath);
866 code = unlink(inodeListPath);
869 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
872 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
876 salvinfo->inodeFd = fileno(inodeFile);
877 if (salvinfo->inodeFd == -1)
878 Abort("Temporary file %s is missing...\n", inodeListPath);
879 afs_lseek(salvinfo->inodeFd, 0L, SEEK_SET);
880 if (ListInodeOption) {
881 PrintInodeList(salvinfo);
884 /* enumerate volumes in the partition.
885 * figure out sets of read-only + rw volumes.
886 * salvage each set, read-only volumes first, then read-write.
887 * Fix up inodes on last volume in set (whether it is read-write
890 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
894 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
895 i < salvinfo->nVolumesInInodeFile; i = j) {
896 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
898 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
900 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
901 struct VolumeSummary *tsp;
902 /* Scan volume list (from partition root directory) looking for the
903 * current rw volume number in the volume list from the inode scan.
904 * If there is one here that is not in the inode volume list,
906 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
908 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
910 /* Now match up the volume summary info from the root directory with the
911 * entry in the volume list obtained from scanning inodes */
912 salvinfo->inodeSummary[j].volSummary = NULL;
913 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
914 if (tsp->header.id == vid) {
915 salvinfo->inodeSummary[j].volSummary = tsp;
921 /* Salvage the group of volumes (several read-only + 1 read/write)
922 * starting with the current read-only volume we're looking at.
924 SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
927 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
928 for (; vsp < esp; vsp++) {
930 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
933 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
934 RemoveTheForce(salvinfo->fileSysPath);
936 if (!Testing && singleVolumeNumber) {
938 #ifdef AFS_DEMAND_ATTACH_FS
939 /* unlock vol headers so the fs can attach them when we AskOnline */
940 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
941 #endif /* AFS_DEMAND_ATTACH_FS */
943 /* Step through the volumeSummary list and set all volumes on-line.
944 * Most volumes were taken off-line in GetVolumeSummary.
945 * If a volume was deleted, don't tell the fileserver anything, since
946 * we already told the fileserver the volume was deleted back when we
947 * we destroyed the volume header.
948 * Also, make sure we bring the singleVolumeNumber back online first.
951 for (j = 0; j < salvinfo->nVolumes; j++) {
952 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
954 if (!salvinfo->volumeSummaryp[j].deleted) {
955 AskOnline(salvinfo, singleVolumeNumber);
961 /* singleVolumeNumber generally should always be in the constructed
962 * volumeSummary, but just in case it's not... */
963 AskOnline(salvinfo, singleVolumeNumber);
966 for (j = 0; j < salvinfo->nVolumes; j++) {
967 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
968 if (!salvinfo->volumeSummaryp[j].deleted) {
969 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
975 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
976 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
979 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
983 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
986 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
989 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
992 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
994 Log("Error %ld destroying volume disk header for volume %lu\n",
995 afs_printable_int32_ld(code),
996 afs_printable_uint32_lu(vsp->header.id));
999 /* make sure we actually delete the fileName file; ENOENT
1000 * is fine, since VDestroyVolumeDiskHeader probably already
1002 if (unlink(path) && errno != ENOENT) {
1003 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1005 if (salvinfo->useFSYNC) {
1006 AskDelete(salvinfo, vsp->header.id);
1014 CompareInodes(const void *_p1, const void *_p2)
1016 const struct ViceInodeInfo *p1 = _p1;
1017 const struct ViceInodeInfo *p2 = _p2;
1018 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1019 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1020 VolumeId p1rwid, p2rwid;
1022 (p1->u.vnode.vnodeNumber ==
1023 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1025 (p2->u.vnode.vnodeNumber ==
1026 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1027 if (p1rwid < p2rwid)
1029 if (p1rwid > p2rwid)
1031 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1032 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1033 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1034 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1035 if (p1->u.vnode.volumeId == p1rwid)
1037 if (p2->u.vnode.volumeId == p2rwid)
1039 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1041 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1042 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1043 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1045 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1047 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1049 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1051 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1053 /* The following tests are reversed, so that the most desirable
1054 * of several similar inodes comes first */
1055 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1056 #ifdef AFS_3DISPARES
1057 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1058 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1061 #ifdef AFS_SGI_EXMAG
1062 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1063 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1068 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1069 #ifdef AFS_3DISPARES
1070 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1071 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1074 #ifdef AFS_SGI_EXMAG
1075 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1076 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1081 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1082 #ifdef AFS_3DISPARES
1083 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1084 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1087 #ifdef AFS_SGI_EXMAG
1088 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1089 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1094 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1095 #ifdef AFS_3DISPARES
1096 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1097 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1100 #ifdef AFS_SGI_EXMAG
1101 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1102 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1111 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1112 struct InodeSummary *summary)
1114 VolumeId volume = ip->u.vnode.volumeId;
1115 VolumeId rwvolume = volume;
1120 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1122 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1124 rwvolume = ip->u.special.parentId;
1125 /* This isn't quite right, as there could (in error) be different
1126 * parent inodes in different special vnodes */
1128 if (maxunique < ip->u.vnode.vnodeUniquifier)
1129 maxunique = ip->u.vnode.vnodeUniquifier;
1133 summary->volumeId = volume;
1134 summary->RWvolumeId = rwvolume;
1135 summary->nInodes = n;
1136 summary->nSpecialInodes = nSpecial;
1137 summary->maxUniquifier = maxunique;
1141 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1143 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1144 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1145 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1150 * Collect list of inodes in file named by path. If a truly fatal error,
1151 * unlink the file and abort. For lessor errors, return -1. The file will
1152 * be unlinked by the caller.
1155 GetInodeSummary(struct SalvInfo *salvinfo, FILE *inodeFile, VolumeId singleVolumeNumber)
1157 struct afs_stat status;
1160 struct ViceInodeInfo *ip, *ip_save;
1161 struct InodeSummary summary;
1162 char summaryFileName[50];
1165 char *dev = salvinfo->fileSysPath;
1166 char *wpath = salvinfo->fileSysPath;
1168 char *dev = salvinfo->fileSysDeviceName;
1169 char *wpath = salvinfo->filesysfulldev;
1171 char *part = salvinfo->fileSysPath;
1175 /* This file used to come from vfsck; cobble it up ourselves now... */
1177 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1178 singleVolumeNumber ? OnlyOneVolume : 0,
1179 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1181 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1184 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1186 if (forceSal && !ForceSalvage) {
1187 Log("***Forced salvage of all volumes on this partition***\n");
1190 fseek(inodeFile, 0L, SEEK_SET);
1191 salvinfo->inodeFd = fileno(inodeFile);
1192 if (salvinfo->inodeFd == -1 || afs_fstat(salvinfo->inodeFd, &status) == -1) {
1193 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1195 tdir = (tmpdir ? tmpdir : part);
1197 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1198 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1200 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1201 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1203 summaryFile = afs_fopen(summaryFileName, "a+");
1204 if (summaryFile == NULL) {
1205 Abort("Unable to create inode summary file\n");
1209 /* Using nt_unlink here since we're really using the delete on close
1210 * semantics of unlink. In most places in the salvager, we really do
1211 * mean to unlink the file at that point. Those places have been
1212 * modified to actually do that so that the NT crt can be used there.
1214 * jaltman - As commented elsewhere, this cannot work because fopen()
1215 * does not open files with DELETE and FILE_SHARE_DELETE.
1217 code = nt_unlink(summaryFileName);
1219 code = unlink(summaryFileName);
1222 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1225 if (!canfork || debug || Fork() == 0) {
1227 unsigned long st_size=(unsigned long) status.st_size;
1228 nInodes = st_size / sizeof(struct ViceInodeInfo);
1230 fclose(summaryFile);
1231 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1232 RemoveTheForce(salvinfo->fileSysPath);
1234 struct VolumeSummary *vsp;
1237 GetVolumeSummary(salvinfo, singleVolumeNumber);
1239 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1241 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1244 Log("%s vice inodes on %s; not salvaged\n",
1245 singleVolumeNumber ? "No applicable" : "No", dev);
1248 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1250 fclose(summaryFile);
1252 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1255 if (read(salvinfo->inodeFd, ip, st_size) != st_size) {
1256 fclose(summaryFile);
1257 Abort("Unable to read inode table; %s not salvaged\n", dev);
1259 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1260 if (afs_lseek(salvinfo->inodeFd, 0, SEEK_SET) == -1
1261 || write(salvinfo->inodeFd, ip, st_size) != st_size) {
1262 fclose(summaryFile);
1263 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1268 CountVolumeInodes(ip, nInodes, &summary);
1269 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1270 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1271 fclose(summaryFile);
1274 summary.index += (summary.nInodes);
1275 nInodes -= summary.nInodes;
1276 ip += summary.nInodes;
1279 ip = ip_save = NULL;
1280 /* Following fflush is not fclose, because if it was debug mode would not work */
1281 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1282 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1283 fclose(summaryFile);
1286 if (canfork && !debug) {
1291 if (Wait("Inode summary") == -1) {
1292 fclose(summaryFile);
1293 Exit(1); /* salvage of this partition aborted */
1296 osi_Assert(afs_fstat(fileno(summaryFile), &status) != -1);
1297 if (status.st_size != 0) {
1299 unsigned long st_status=(unsigned long)status.st_size;
1300 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_status);
1301 osi_Assert(salvinfo->inodeSummary != NULL);
1302 /* For GNU we need to do lseek to get the file pointer moved. */
1303 osi_Assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1304 ret = read(fileno(summaryFile), salvinfo->inodeSummary, st_status);
1305 osi_Assert(ret == st_status);
1307 salvinfo->nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1308 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1309 salvinfo->inodeSummary[i].volSummary = NULL;
1311 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)(status.st_size));
1312 fclose(summaryFile);
1316 /* Comparison routine for volume sort.
1317 This is setup so that a read-write volume comes immediately before
1318 any read-only clones of that volume */
1320 CompareVolumes(const void *_p1, const void *_p2)
1322 const struct VolumeSummary *p1 = _p1;
1323 const struct VolumeSummary *p2 = _p2;
1324 if (p1->header.parent != p2->header.parent)
1325 return p1->header.parent < p2->header.parent ? -1 : 1;
1326 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1328 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1330 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1334 * Gleans volumeSummary information by asking the fileserver
1336 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1337 * salvaging a whole partition
1339 * @return whether we obtained the volume summary information or not
1340 * @retval 0 success; we obtained the volume summary information
1341 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1343 * @retval 1 we did not get the volume summary information; either the
1344 * fileserver responded with an error, or we are not supposed to
1345 * ask the fileserver for the information (e.g. we are salvaging
1346 * the entire partition or we are not the salvageserver)
1348 * @note for non-DAFS, always returns 1
1351 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1354 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1355 if (programType == salvageServer) {
1356 if (singleVolumeNumber) {
1357 FSSYNC_VGQry_response_t q_res;
1359 struct VolumeSummary *vsp;
1361 struct VolumeDiskHeader diskHdr;
1363 memset(&res, 0, sizeof(res));
1365 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1368 * We must wait for the partition to finish scanning before
1369 * can continue, since we will not know if we got the entire
1370 * VG membership unless the partition is fully scanned.
1371 * We could, in theory, just scan the partition ourselves if
1372 * the VG cache is not ready, but we would be doing the exact
1373 * same scan the fileserver is doing; it will almost always
1374 * be faster to wait for the fileserver. The only exceptions
1375 * are if the partition does not take very long to scan, and
1376 * in that case it's fast either way, so who cares?
1378 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1379 Log("waiting for fileserver to finish scanning partition %s...\n",
1380 salvinfo->fileSysPartition->name);
1382 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1383 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1384 * just so small partitions don't need to wait over 10
1385 * seconds every time, and large partitions are generally
1386 * polled only once every ten seconds. */
1387 sleep((i > 10) ? (i = 10) : i);
1389 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1393 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1394 /* This can happen if there's no header for the volume
1395 * we're salvaging, or no headers exist for the VG (if
1396 * we're salvaging an RW). Act as if we got a response
1397 * with no VG members. The headers may be created during
1398 * salvaging, if there are inodes in this VG. */
1400 memset(&q_res, 0, sizeof(q_res));
1401 q_res.rw = singleVolumeNumber;
1405 Log("fileserver refused VGCQuery request for volume %lu on "
1406 "partition %s, code %ld reason %ld\n",
1407 afs_printable_uint32_lu(singleVolumeNumber),
1408 salvinfo->fileSysPartition->name,
1409 afs_printable_int32_ld(code),
1410 afs_printable_int32_ld(res.hdr.reason));
1414 if (q_res.rw != singleVolumeNumber) {
1415 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1416 afs_printable_uint32_lu(singleVolumeNumber),
1417 afs_printable_uint32_lu(q_res.rw));
1418 #ifdef SALVSYNC_BUILD_CLIENT
1419 if (SALVSYNC_LinkVolume(q_res.rw,
1421 salvinfo->fileSysPartition->name,
1423 Log("schedule request failed\n");
1425 #endif /* SALVSYNC_BUILD_CLIENT */
1426 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1429 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1430 osi_Assert(salvinfo->volumeSummaryp != NULL);
1432 salvinfo->nVolumes = 0;
1433 vsp = salvinfo->volumeSummaryp;
1435 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1436 char name[VMAXPATHLEN];
1438 if (!q_res.children[i]) {
1442 /* AskOffline for singleVolumeNumber was called much earlier */
1443 if (q_res.children[i] != singleVolumeNumber) {
1444 AskOffline(salvinfo, q_res.children[i]);
1445 if (LockVolume(salvinfo, q_res.children[i])) {
1451 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1453 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1454 afs_printable_uint32_lu(q_res.children[i]));
1459 DiskToVolumeHeader(&vsp->header, &diskHdr);
1460 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1461 vsp->fileName = ToString(name);
1462 salvinfo->nVolumes++;
1466 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1471 Log("Cannot get volume summary from fileserver; falling back to scanning "
1472 "entire partition\n");
1475 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1480 * count how many volume headers are found by VWalkVolumeHeaders.
1482 * @param[in] dp the disk partition (unused)
1483 * @param[in] name full path to the .vol header (unused)
1484 * @param[in] hdr the header data (unused)
1485 * @param[in] last whether this is the last try or not (unused)
1486 * @param[in] rock actually an afs_int32*; the running count of how many
1487 * volumes we have found
1492 CountHeader(struct DiskPartition64 *dp, const char *name,
1493 struct VolumeDiskHeader *hdr, int last, void *rock)
1495 afs_int32 *nvols = (afs_int32 *)rock;
1501 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1504 struct SalvageScanParams {
1505 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1506 * vol id of the VG we're salvaging */
1507 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1508 * we're filling in */
1509 afs_int32 nVolumes; /**< # of vols we've encountered */
1510 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1511 * # of vols we've alloc'd memory for) */
1512 int retry; /**< do we need to retry vol lock/checkout? */
1513 struct SalvInfo *salvinfo; /**< salvage job info */
1517 * records volume summary info found from VWalkVolumeHeaders.
1519 * Found volumes are also taken offline if they are in the specific volume
1520 * group we are looking for.
1522 * @param[in] dp the disk partition
1523 * @param[in] name full path to the .vol header
1524 * @param[in] hdr the header data
1525 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1526 * @param[in] rock actually a struct SalvageScanParams*, containing the
1527 * information needed to record the volume summary data
1529 * @return operation status
1531 * @retval -1 volume locking raced with fileserver restart; checking out
1532 * and locking volumes needs to be retried
1533 * @retval 1 volume header is mis-named and should be deleted
1536 RecordHeader(struct DiskPartition64 *dp, const char *name,
1537 struct VolumeDiskHeader *hdr, int last, void *rock)
1539 char nameShouldBe[64];
1540 struct SalvageScanParams *params;
1541 struct VolumeSummary summary;
1542 VolumeId singleVolumeNumber;
1543 struct SalvInfo *salvinfo;
1545 params = (struct SalvageScanParams *)rock;
1547 singleVolumeNumber = params->singleVolumeNumber;
1548 salvinfo = params->salvinfo;
1550 DiskToVolumeHeader(&summary.header, hdr);
1552 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1553 && summary.header.parent != singleVolumeNumber) {
1555 if (programType == salvageServer) {
1556 #ifdef SALVSYNC_BUILD_CLIENT
1557 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1558 summary.header.id, summary.header.parent);
1559 if (SALVSYNC_LinkVolume(summary.header.parent,
1563 Log("schedule request failed\n");
1566 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1569 Log("%u is a read-only volume; not salvaged\n",
1570 singleVolumeNumber);
1575 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1576 || summary.header.parent == singleVolumeNumber) {
1578 /* check if the header file is incorrectly named */
1580 const char *base = strrchr(name, OS_DIRSEPC);
1587 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1588 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1591 if (strcmp(nameShouldBe, base)) {
1592 /* .vol file has wrong name; retry/delete */
1596 if (!badname || last) {
1597 /* only offline the volume if the header is good, or if this is
1598 * the last try looking at it; avoid AskOffline'ing the same vol
1601 if (singleVolumeNumber
1602 && summary.header.id != singleVolumeNumber) {
1603 /* don't offline singleVolumeNumber; we already did that
1606 AskOffline(salvinfo, summary.header.id);
1608 #ifdef AFS_DEMAND_ATTACH_FS
1610 /* don't lock the volume if the header is bad, since we're
1611 * about to delete it anyway. */
1612 if (LockVolume(salvinfo, summary.header.id)) {
1617 #endif /* AFS_DEMAND_ATTACH_FS */
1621 if (last && !Showmode) {
1622 Log("Volume header file %s is incorrectly named (should be %s "
1623 "not %s); %sdeleted (it will be recreated later, if "
1624 "necessary)\n", name, nameShouldBe, base,
1625 (Testing ? "it would have been " : ""));
1630 summary.fileName = ToString(base);
1633 if (params->nVolumes > params->totalVolumes) {
1634 /* We found more volumes than we found on the first partition walk;
1635 * apparently something created a volume while we were
1636 * partition-salvaging, or we found more than 20 vols when salvaging a
1637 * particular volume. Abort if we detect this, since other programs
1638 * supposed to not touch the partition while it is partition-salvaging,
1639 * and we shouldn't find more than 20 vols in a VG.
1641 Abort("Found %ld vol headers, but should have found at most %ld! "
1642 "Make sure the volserver/fileserver are not running at the "
1643 "same time as a partition salvage\n",
1644 afs_printable_int32_ld(params->nVolumes),
1645 afs_printable_int32_ld(params->totalVolumes));
1648 memcpy(params->vsp, &summary, sizeof(summary));
1656 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1658 * If the header could not be read in at all, the header is always unlinked.
1659 * If instead RecordHeader said the header was bad (that is, the header file
1660 * is mis-named), we only unlink if we are doing a partition salvage, as
1661 * opposed to salvaging a specific volume group.
1663 * @param[in] dp the disk partition
1664 * @param[in] name full path to the .vol header
1665 * @param[in] hdr header data, or NULL if the header could not be read
1666 * @param[in] rock actually a struct SalvageScanParams*, with some information
1670 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1671 struct VolumeDiskHeader *hdr, void *rock)
1673 struct SalvageScanParams *params;
1676 params = (struct SalvageScanParams *)rock;
1679 /* no header; header is too bogus to read in at all */
1681 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1687 } else if (!params->singleVolumeNumber) {
1688 /* We were able to read in a header, but RecordHeader said something
1689 * was wrong with it. We only unlink those if we are doing a partition
1696 if (dounlink && unlink(name)) {
1697 Log("Error %d while trying to unlink %s\n", errno, name);
1702 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1703 * the fileserver for VG information, or by scanning the /vicepX partition.
1705 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1706 * are salvaging, or 0 if this is a partition
1709 * @return operation status
1711 * @retval -1 we raced with a fileserver restart; checking out and locking
1712 * volumes must be retried
1715 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1717 afs_int32 nvols = 0;
1718 struct SalvageScanParams params;
1721 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1723 /* we successfully got the vol information from the fileserver; no
1724 * need to scan the partition */
1728 /* we need to retry volume checkout */
1732 if (!singleVolumeNumber) {
1733 /* Count how many volumes we have in /vicepX */
1734 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1737 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1742 nvols = VOL_VG_MAX_VOLS;
1745 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1746 osi_Assert(salvinfo->volumeSummaryp != NULL);
1748 params.singleVolumeNumber = singleVolumeNumber;
1749 params.vsp = salvinfo->volumeSummaryp;
1750 params.nVolumes = 0;
1751 params.totalVolumes = nvols;
1753 params.salvinfo = salvinfo;
1755 /* walk the partition directory of volume headers and record the info
1756 * about them; unlinking invalid headers */
1757 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1758 UnlinkHeader, ¶ms);
1760 /* we apparently need to retry checking-out/locking volumes */
1764 Abort("Failed to get volume header summary\n");
1766 salvinfo->nVolumes = params.nVolumes;
1768 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1774 /* Find the link table. This should be associated with the RW volume or, if
1775 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1778 FindLinkHandle(struct InodeSummary *isp, int nVols,
1779 struct ViceInodeInfo *allInodes)
1782 struct ViceInodeInfo *ip;
1784 for (i = 0; i < nVols; i++) {
1785 ip = allInodes + isp[i].index;
1786 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1787 if (ip[j].u.special.type == VI_LINKTABLE)
1788 return ip[j].inodeNumber;
1795 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1797 struct versionStamp version;
1800 if (!VALID_INO(ino))
1802 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1803 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1804 if (!VALID_INO(ino))
1806 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1807 isp->RWvolumeId, errno);
1808 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1809 fdP = IH_OPEN(salvinfo->VGLinkH);
1811 Abort("Can't open link table for volume %u (error = %d)\n",
1812 isp->RWvolumeId, errno);
1814 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1815 Abort("Can't truncate link table for volume %u (error = %d)\n",
1816 isp->RWvolumeId, errno);
1818 version.magic = LINKTABLEMAGIC;
1819 version.version = LINKTABLEVERSION;
1821 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1823 Abort("Can't truncate link table for volume %u (error = %d)\n",
1824 isp->RWvolumeId, errno);
1826 FDH_REALLYCLOSE(fdP);
1828 /* If the volume summary exits (i.e., the V*.vol header file exists),
1829 * then set this inode there as well.
1831 if (isp->volSummary)
1832 isp->volSummary->header.linkTable = ino;
1841 SVGParms_t *parms = (SVGParms_t *) arg;
1842 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1847 SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1850 pthread_attr_t tattr;
1854 /* Initialize per volume global variables, even if later code does so */
1855 salvinfo->VolumeChanged = 0;
1856 salvinfo->VGLinkH = NULL;
1857 salvinfo->VGLinkH_cnt = 0;
1858 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1860 parms.svgp_inodeSummaryp = isp;
1861 parms.svgp_count = nVols;
1862 parms.svgp_salvinfo = salvinfo;
1863 code = pthread_attr_init(&tattr);
1865 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1869 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1871 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1874 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1876 Log("Failed to create thread to salvage volume group %u\n",
1880 (void)pthread_join(tid, NULL);
1882 #endif /* AFS_NT40_ENV */
1885 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1887 struct ViceInodeInfo *inodes, *allInodes, *ip;
1888 int i, totalInodes, size, salvageTo;
1892 int dec_VGLinkH = 0;
1894 FdHandle_t *fdP = NULL;
1896 salvinfo->VGLinkH_cnt = 0;
1897 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1898 && isp->nSpecialInodes > 0);
1899 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1900 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1903 if (ShowMounts && !haveRWvolume)
1905 if (canfork && !debug && Fork() != 0) {
1906 (void)Wait("Salvage volume group");
1909 for (i = 0, totalInodes = 0; i < nVols; i++)
1910 totalInodes += isp[i].nInodes;
1911 size = totalInodes * sizeof(struct ViceInodeInfo);
1912 inodes = (struct ViceInodeInfo *)malloc(size);
1913 allInodes = inodes - isp->index; /* this would the base of all the inodes
1914 * for the partition, if all the inodes
1915 * had been read into memory */
1916 osi_Assert(afs_lseek
1917 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1919 osi_Assert(read(salvinfo->inodeFd, inodes, size) == size);
1921 /* Don't try to salvage a read write volume if there isn't one on this
1923 salvageTo = haveRWvolume ? 0 : 1;
1925 #ifdef AFS_NAMEI_ENV
1926 ino = FindLinkHandle(isp, nVols, allInodes);
1927 if (VALID_INO(ino)) {
1928 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1929 fdP = IH_OPEN(salvinfo->VGLinkH);
1931 if (!VALID_INO(ino) || fdP == NULL) {
1932 Log("%s link table for volume %u.\n",
1933 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1935 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1938 struct ViceInodeInfo *ip;
1939 CreateLinkTable(salvinfo, isp, ino);
1940 fdP = IH_OPEN(salvinfo->VGLinkH);
1941 /* Sync fake 1 link counts to the link table, now that it exists */
1943 for (i = 0; i < nVols; i++) {
1944 ip = allInodes + isp[i].index;
1945 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1946 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1953 FDH_REALLYCLOSE(fdP);
1955 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1958 /* Salvage in reverse order--read/write volume last; this way any
1959 * Inodes not referenced by the time we salvage the read/write volume
1960 * can be picked up by the read/write volume */
1961 /* ACTUALLY, that's not done right now--the inodes just vanish */
1962 for (i = nVols - 1; i >= salvageTo; i--) {
1964 struct InodeSummary *lisp = &isp[i];
1965 #ifdef AFS_NAMEI_ENV
1966 /* If only the RO is present on this partition, the link table
1967 * shows up as a RW volume special file. Need to make sure the
1968 * salvager doesn't try to salvage the non-existent RW.
1970 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1971 /* If this only special inode is the link table, continue */
1972 if (inodes->u.special.type == VI_LINKTABLE) {
1979 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1980 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1981 /* Check inodes twice. The second time do things seriously. This
1982 * way the whole RO volume can be deleted, below, if anything goes wrong */
1983 for (check = 1; check >= 0; check--) {
1985 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
1987 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
1988 if (rw && deleteMe) {
1989 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1990 * volume won't be called */
1996 if (rw && check == 1)
1998 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
1999 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2005 /* Fix actual inode counts */
2008 Log("totalInodes %d\n",totalInodes);
2009 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2010 static int TraceBadLinkCounts = 0;
2011 #ifdef AFS_NAMEI_ENV
2012 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2013 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2014 VGLinkH_p1 = ip->u.param[0];
2015 continue; /* Deal with this last. */
2018 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2019 TraceBadLinkCounts--; /* Limit reports, per volume */
2020 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2022 while (ip->linkCount > 0) {
2023 /* below used to assert, not break */
2025 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2026 Log("idec failed. inode %s errno %d\n",
2027 PrintInode(stmp, ip->inodeNumber), errno);
2033 while (ip->linkCount < 0) {
2034 /* these used to be asserts */
2036 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2037 Log("iinc failed. inode %s errno %d\n",
2038 PrintInode(stmp, ip->inodeNumber), errno);
2045 #ifdef AFS_NAMEI_ENV
2046 while (dec_VGLinkH > 0) {
2047 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2048 Log("idec failed on link table, errno = %d\n", errno);
2052 while (dec_VGLinkH < 0) {
2053 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2054 Log("iinc failed on link table, errno = %d\n", errno);
2061 /* Directory consistency checks on the rw volume */
2063 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2064 IH_RELEASE(salvinfo->VGLinkH);
2066 if (canfork && !debug) {
2073 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2075 /* Check headers BEFORE forking */
2079 for (i = 0; i < nVols; i++) {
2080 struct VolumeSummary *vs = isp[i].volSummary;
2081 VolumeDiskData volHeader;
2083 /* Don't salvage just because phantom rw volume is there... */
2084 /* (If a read-only volume exists, read/write inodes must also exist) */
2085 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2089 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2090 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2091 == sizeof(volHeader)
2092 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2093 && volHeader.dontSalvage == DONT_SALVAGE
2094 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2095 if (volHeader.inUse != 0) {
2096 volHeader.inUse = 0;
2097 volHeader.inService = 1;
2099 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2100 != sizeof(volHeader)) {
2116 /* SalvageVolumeHeaderFile
2118 * Salvage the top level V*.vol header file. Make sure the special files
2119 * exist and that there are no duplicates.
2121 * Calls SalvageHeader for each possible type of volume special file.
2125 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2126 struct ViceInodeInfo *inodes, int RW,
2127 int check, int *deleteMe)
2130 struct ViceInodeInfo *ip;
2131 int allinodesobsolete = 1;
2132 struct VolumeDiskHeader diskHeader;
2133 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2135 struct VolumeHeader tempHeader;
2136 struct afs_inode_info stuff[MAXINODETYPE];
2138 /* keeps track of special inodes that are probably 'good'; they are
2139 * referenced in the vol header, and are included in the given inodes
2144 } goodspecial[MAXINODETYPE];
2149 memset(goodspecial, 0, sizeof(goodspecial));
2151 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2153 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2155 Log("cannot allocate memory for inode skip array when salvaging "
2156 "volume %lu; not performing duplicate special inode recovery\n",
2157 afs_printable_uint32_lu(isp->volumeId));
2158 /* still try to perform the salvage; the skip array only does anything
2159 * if we detect duplicate special inodes */
2162 init_inode_info(&tempHeader, stuff);
2165 * First, look at the special inodes and see if any are referenced by
2166 * the existing volume header. If we find duplicate special inodes, we
2167 * can use this information to use the referenced inode (it's more
2168 * likely to be the 'good' one), and throw away the duplicates.
2170 if (isp->volSummary && skip) {
2171 /* use tempHeader, so we can use the stuff[] array to easily index
2172 * into the isp->volSummary special inodes */
2173 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2175 for (i = 0; i < isp->nSpecialInodes; i++) {
2176 ip = &inodes[isp->index + i];
2177 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2178 /* will get taken care of in a later loop */
2181 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2182 goodspecial[ip->u.special.type-1].valid = 1;
2183 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2188 memset(&tempHeader, 0, sizeof(tempHeader));
2189 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2190 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2191 tempHeader.id = isp->volumeId;
2192 tempHeader.parent = isp->RWvolumeId;
2194 /* Check for duplicates (inodes are sorted by type field) */
2195 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2196 ip = &inodes[isp->index + i];
2197 if (ip->u.special.type == (ip + 1)->u.special.type) {
2198 afs_ino_str_t stmp1, stmp2;
2200 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2201 /* Will be caught in the loop below */
2205 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2206 ip->u.special.type, isp->volumeId,
2207 PrintInode(stmp1, ip->inodeNumber),
2208 PrintInode(stmp2, (ip+1)->inodeNumber));
2210 if (skip && goodspecial[ip->u.special.type-1].valid) {
2211 Inode gi = goodspecial[ip->u.special.type-1].inode;
2214 Log("using special inode referenced by vol header (%s)\n",
2215 PrintInode(stmp1, gi));
2218 /* the volume header references some special inode of
2219 * this type in the inodes array; are we it? */
2220 if (ip->inodeNumber != gi) {
2222 } else if ((ip+1)->inodeNumber != gi) {
2223 /* in case this is the last iteration; we need to
2224 * make sure we check ip+1, too */
2229 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2237 for (i = 0; i < isp->nSpecialInodes; i++) {
2239 ip = &inodes[isp->index + i];
2240 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2242 Log("Rubbish header inode %s of type %d\n",
2243 PrintInode(stmp, ip->inodeNumber),
2244 ip->u.special.type);
2250 Log("Rubbish header inode %s of type %d; deleted\n",
2251 PrintInode(stmp, ip->inodeNumber),
2252 ip->u.special.type);
2253 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2254 if (skip && skip[i]) {
2255 if (orphans == ORPH_REMOVE) {
2256 Log("Removing orphan special inode %s of type %d\n",
2257 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2260 Log("Ignoring orphan special inode %s of type %d\n",
2261 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2262 /* fall through to the ip->linkCount--; line below */
2265 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2266 allinodesobsolete = 0;
2268 if (!check && ip->u.special.type != VI_LINKTABLE)
2269 ip->linkCount--; /* Keep the inode around */
2277 if (allinodesobsolete) {
2284 salvinfo->VGLinkH_cnt++; /* one for every header. */
2286 if (!RW && !check && isp->volSummary) {
2287 ClearROInUseBit(isp->volSummary);
2291 for (i = 0; i < MAXINODETYPE; i++) {
2292 if (stuff[i].inodeType == VI_LINKTABLE) {
2293 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2294 * And we may have recreated the link table earlier, so set the
2295 * RW header as well.
2297 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2298 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2302 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2306 if (isp->volSummary == NULL) {
2308 char headerName[64];
2309 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2310 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2312 Log("No header file for volume %u\n", isp->volumeId);
2316 Log("No header file for volume %u; %screating %s\n",
2317 isp->volumeId, (Testing ? "it would have been " : ""),
2319 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2320 isp->volSummary->fileName = ToString(headerName);
2322 writefunc = VCreateVolumeDiskHeader;
2325 char headerName[64];
2326 /* hack: these two fields are obsolete... */
2327 isp->volSummary->header.volumeAcl = 0;
2328 isp->volSummary->header.volumeMountTable = 0;
2331 (&isp->volSummary->header, &tempHeader,
2332 sizeof(struct VolumeHeader))) {
2333 /* We often remove the name before calling us, so we make a fake one up */
2334 if (isp->volSummary->fileName) {
2335 strcpy(headerName, isp->volSummary->fileName);
2337 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2338 isp->volSummary->fileName = ToString(headerName);
2340 (void)afs_snprintf(path, sizeof path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, headerName);
2342 Log("Header file %s is damaged or no longer valid%s\n", path,
2343 (check ? "" : "; repairing"));
2347 writefunc = VWriteVolumeDiskHeader;
2351 memcpy(&isp->volSummary->header, &tempHeader,
2352 sizeof(struct VolumeHeader));
2355 Log("It would have written a new header file for volume %u\n",
2359 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2360 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2362 Log("Error %ld writing volume header file for volume %lu\n",
2363 afs_printable_int32_ld(code),
2364 afs_printable_uint32_lu(diskHeader.id));
2369 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2370 isp->volSummary->header.volumeInfo);
2375 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2376 struct InodeSummary *isp, int check, int *deleteMe)
2379 VolumeDiskData volumeInfo;
2380 struct versionStamp fileHeader;
2389 #ifndef AFS_NAMEI_ENV
2390 if (sp->inodeType == VI_LINKTABLE)
2393 if (*(sp->inode) == 0) {
2395 Log("Missing inode in volume header (%s)\n", sp->description);
2399 Log("Missing inode in volume header (%s); %s\n", sp->description,
2400 (Testing ? "it would have recreated it" : "recreating"));
2403 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2404 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2405 if (!VALID_INO(*(sp->inode)))
2407 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2408 sp->description, errno);
2413 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2414 fdP = IH_OPEN(specH);
2415 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2416 /* bail out early and destroy the volume */
2418 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2425 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2426 sp->description, errno);
2429 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2430 || header.fileHeader.magic != sp->stamp.magic)) {
2432 Log("Part of the header (%s) is corrupted\n", sp->description);
2433 FDH_REALLYCLOSE(fdP);
2437 Log("Part of the header (%s) is corrupted; recreating\n",
2440 /* header can be garbage; make sure we don't read garbage data from
2442 memset(&header, 0, sizeof(header));
2444 if (sp->inodeType == VI_VOLINFO
2445 && header.volumeInfo.destroyMe == DESTROY_ME) {
2448 FDH_REALLYCLOSE(fdP);
2452 if (recreate && !Testing) {
2455 ("Internal error: recreating volume header (%s) in check mode\n",
2457 nBytes = FDH_TRUNC(fdP, 0);
2459 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2460 sp->description, errno);
2462 /* The following code should be moved into vutil.c */
2463 if (sp->inodeType == VI_VOLINFO) {
2465 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2466 header.volumeInfo.stamp = sp->stamp;
2467 header.volumeInfo.id = isp->volumeId;
2468 header.volumeInfo.parentId = isp->RWvolumeId;
2469 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2470 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2471 isp->volumeId, isp->volumeId);
2472 header.volumeInfo.inService = 0;
2473 header.volumeInfo.blessed = 0;
2474 /* The + 1000 is a hack in case there are any files out in venus caches */
2475 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2476 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2477 header.volumeInfo.needsCallback = 0;
2478 gettimeofday(&tp, 0);
2479 header.volumeInfo.creationDate = tp.tv_sec;
2481 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2482 sizeof(header.volumeInfo), 0);
2483 if (nBytes != sizeof(header.volumeInfo)) {
2486 ("Unable to write volume header file (%s) (errno = %d)\n",
2487 sp->description, errno);
2488 Abort("Unable to write entire volume header file (%s)\n",
2492 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2493 if (nBytes != sizeof(sp->stamp)) {
2496 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2497 sp->description, errno);
2499 ("Unable to write entire version stamp in volume header file (%s)\n",
2504 FDH_REALLYCLOSE(fdP);
2506 if (sp->inodeType == VI_VOLINFO) {
2507 salvinfo->VolInfo = header.volumeInfo;
2511 if (salvinfo->VolInfo.updateDate) {
2512 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2514 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2515 salvinfo->VolInfo.id,
2516 (Testing ? "it would have been " : ""), update);
2518 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2520 Log("%s (%u) not updated (created %s)\n",
2521 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2531 SalvageVnodes(struct SalvInfo *salvinfo,
2532 struct InodeSummary *rwIsp,
2533 struct InodeSummary *thisIsp,
2534 struct ViceInodeInfo *inodes, int check)
2536 int ilarge, ismall, ioffset, RW, nInodes;
2537 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2540 RW = (rwIsp == thisIsp);
2541 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2543 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2544 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2545 if (check && ismall == -1)
2548 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2549 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2550 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2554 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2555 struct ViceInodeInfo *ip, int nInodes,
2556 struct VolumeSummary *volSummary, int check)
2558 char buf[SIZEOF_LARGEDISKVNODE];
2559 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2561 StreamHandle_t *file;
2562 struct VnodeClassInfo *vcp;
2564 afs_sfsize_t nVnodes;
2565 afs_fsize_t vnodeLength;
2567 afs_ino_str_t stmp1, stmp2;
2571 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2572 fdP = IH_OPEN(handle);
2573 osi_Assert(fdP != NULL);
2574 file = FDH_FDOPEN(fdP, "r+");
2575 osi_Assert(file != NULL);
2576 vcp = &VnodeClassInfo[class];
2577 size = OS_SIZE(fdP->fd_fd);
2578 osi_Assert(size != -1);
2579 nVnodes = (size / vcp->diskSize) - 1;
2581 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2582 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2586 for (vnodeIndex = 0;
2587 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2588 nVnodes--, vnodeIndex++) {
2589 if (vnode->type != vNull) {
2590 int vnodeChanged = 0;
2591 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2592 if (VNDISK_GET_INO(vnode) == 0) {
2594 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2595 memset(vnode, 0, vcp->diskSize);
2599 if (vcp->magic != vnode->vnodeMagic) {
2600 /* bad magic #, probably partially created vnode */
2602 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2603 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2604 afs_printable_uint32_lu(vcp->magic));
2605 memset(vnode, 0, vcp->diskSize);
2609 Log("Partially allocated vnode %d deleted.\n",
2611 memset(vnode, 0, vcp->diskSize);
2615 /* ****** Should do a bit more salvage here: e.g. make sure
2616 * vnode type matches what it should be given the index */
2617 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2618 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2619 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2620 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2627 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2628 /* The following doesn't work, because the version number
2629 * is not maintained correctly by the file server */
2630 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2631 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2633 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2639 /* For RW volume, look for vnode with matching inode number;
2640 * if no such match, take the first determined by our sort
2642 struct ViceInodeInfo *lip = ip;
2643 int lnInodes = nInodes;
2645 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2646 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2655 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2656 /* "Matching" inode */
2660 vu = vnode->uniquifier;
2661 iu = ip->u.vnode.vnodeUniquifier;
2662 vd = vnode->dataVersion;
2663 id = ip->u.vnode.inodeDataVersion;
2665 * Because of the possibility of the uniquifier overflows (> 4M)
2666 * we compare them modulo the low 22-bits; we shouldn't worry
2667 * about mismatching since they shouldn't to many old
2668 * uniquifiers of the same vnode...
2670 if (IUnique(vu) != IUnique(iu)) {
2672 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2675 vnode->uniquifier = iu;
2676 #ifdef AFS_3DISPARES
2677 vnode->dataVersion = (id >= vd ?
2680 1887437 ? vd : id) :
2683 1887437 ? id : vd));
2685 #if defined(AFS_SGI_EXMAG)
2686 vnode->dataVersion = (id >= vd ?
2689 15099494 ? vd : id) :
2692 15099494 ? id : vd));
2694 vnode->dataVersion = (id > vd ? id : vd);
2695 #endif /* AFS_SGI_EXMAG */
2696 #endif /* AFS_3DISPARES */
2699 /* don't bother checking for vd > id any more, since
2700 * partial file transfers always result in this state,
2701 * and you can't do much else anyway (you've already
2702 * found the best data you can) */
2703 #ifdef AFS_3DISPARES
2704 if (!vnodeIsDirectory(vnodeNumber)
2705 && ((vd < id && (id - vd) < 1887437)
2706 || ((vd > id && (vd - id) > 1887437)))) {
2708 #if defined(AFS_SGI_EXMAG)
2709 if (!vnodeIsDirectory(vnodeNumber)
2710 && ((vd < id && (id - vd) < 15099494)
2711 || ((vd > id && (vd - id) > 15099494)))) {
2713 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2714 #endif /* AFS_SGI_EXMAG */
2717 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2718 vnode->dataVersion = id;
2723 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2726 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2728 VNDISK_SET_INO(vnode, ip->inodeNumber);
2733 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2735 VNDISK_SET_INO(vnode, ip->inodeNumber);
2738 VNDISK_GET_LEN(vnodeLength, vnode);
2739 if (ip->byteCount != vnodeLength) {
2742 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2747 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2748 VNDISK_SET_LEN(vnode, ip->byteCount);
2752 ip->linkCount--; /* Keep the inode around */
2755 } else { /* no matching inode */
2757 if (VNDISK_GET_INO(vnode) != 0
2758 || vnode->type == vDirectory) {
2759 /* No matching inode--get rid of the vnode */
2761 if (VNDISK_GET_INO(vnode)) {
2763 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2767 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2772 if (VNDISK_GET_INO(vnode)) {
2774 time_t serverModifyTime = vnode->serverModifyTime;
2775 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2779 time_t serverModifyTime = vnode->serverModifyTime;
2780 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2783 memset(vnode, 0, vcp->diskSize);
2786 /* Should not reach here becuase we checked for
2787 * (inodeNumber == 0) above. And where we zero the vnode,
2788 * we also goto vnodeDone.
2792 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2796 } /* VNDISK_GET_INO(vnode) != 0 */
2798 osi_Assert(!(vnodeChanged && check));
2799 if (vnodeChanged && !Testing) {
2800 osi_Assert(IH_IWRITE
2801 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2802 (char *)vnode, vcp->diskSize)
2804 salvinfo->VolumeChanged = 1; /* For break call back */
2815 struct VnodeEssence *
2816 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2819 struct VnodeInfo *vip;
2822 class = vnodeIdToClass(vnodeNumber);
2823 vip = &salvinfo->vnodeInfo[class];
2824 offset = vnodeIdToBitNumber(vnodeNumber);
2825 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2829 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2831 /* Copy the directory unconditionally if we are going to change it:
2832 * not just if was cloned.
2834 struct VnodeDiskObject vnode;
2835 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2836 Inode oldinode, newinode;
2839 if (dir->copied || Testing)
2841 DFlush(); /* Well justified paranoia... */
2844 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2845 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2847 osi_Assert(code == sizeof(vnode));
2848 oldinode = VNDISK_GET_INO(&vnode);
2849 /* Increment the version number by a whole lot to avoid problems with
2850 * clients that were promised new version numbers--but the file server
2851 * crashed before the versions were written to disk.
2854 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2855 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2857 osi_Assert(VALID_INO(newinode));
2858 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2860 VNDISK_SET_INO(&vnode, newinode);
2862 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2863 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2865 osi_Assert(code == sizeof(vnode));
2867 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2868 salvinfo->fileSysDevice, newinode,
2869 &salvinfo->VolumeChanged);
2870 /* Don't delete the original inode right away, because the directory is
2871 * still being scanned.
2877 * This function should either successfully create a new dir, or give up
2878 * and leave things the way they were. In particular, if it fails to write
2879 * the new dir properly, it should return w/o changing the reference to the
2883 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2885 struct VnodeDiskObject vnode;
2886 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2887 Inode oldinode, newinode;
2892 afs_int32 parentUnique = 1;
2893 struct VnodeEssence *vnodeEssence;
2898 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2900 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2901 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2903 osi_Assert(lcode == sizeof(vnode));
2904 oldinode = VNDISK_GET_INO(&vnode);
2905 /* Increment the version number by a whole lot to avoid problems with
2906 * clients that were promised new version numbers--but the file server
2907 * crashed before the versions were written to disk.
2910 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2911 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2913 osi_Assert(VALID_INO(newinode));
2914 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2915 &salvinfo->VolumeChanged);
2917 /* Assign . and .. vnode numbers from dir and vnode.parent.
2918 * The uniquifier for . is in the vnode.
2919 * The uniquifier for .. might be set to a bogus value of 1 and
2920 * the salvager will later clean it up.
2922 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2923 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2926 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2928 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2933 /* didn't really build the new directory properly, let's just give up. */
2934 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2935 Log("Directory salvage returned code %d, continuing.\n", code);
2937 Log("also failed to decrement link count on new inode");
2941 Log("Checking the results of the directory salvage...\n");
2942 if (!DirOK(&newdir)) {
2943 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2944 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2945 osi_Assert(code == 0);
2949 VNDISK_SET_INO(&vnode, newinode);
2950 length = Length(&newdir);
2951 VNDISK_SET_LEN(&vnode, length);
2953 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2954 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2956 osi_Assert(lcode == sizeof(vnode));
2959 nt_sync(salvinfo->fileSysDevice);
2961 sync(); /* this is slow, but hopefully rarely called. We don't have
2962 * an open FD on the file itself to fsync.
2966 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2968 /* make sure old directory file is really closed */
2969 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2970 FDH_REALLYCLOSE(fdP);
2972 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2973 osi_Assert(code == 0);
2974 dir->dirHandle = newdir;
2978 * arguments for JudgeEntry.
2980 struct judgeEntry_params {
2981 struct DirSummary *dir; /**< directory we're examining entries in */
2982 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
2986 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
2989 struct judgeEntry_params *params = arock;
2990 struct DirSummary *dir = params->dir;
2991 struct SalvInfo *salvinfo = params->salvinfo;
2992 struct VnodeEssence *vnodeEssence;
2993 afs_int32 dirOrphaned, todelete;
2995 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
2997 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
2998 if (vnodeEssence == NULL) {
3000 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3003 CopyOnWrite(salvinfo, dir);
3004 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3009 #ifndef AFS_NAMEI_ENV
3010 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3011 * mount inode for the partition. If this inode were deleted, it would crash
3014 if (vnodeEssence->InodeNumber == 0) {
3015 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3017 CopyOnWrite(salvinfo, dir);
3018 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3025 if (!(vnodeNumber & 1) && !Showmode
3026 && !(vnodeEssence->count || vnodeEssence->unique
3027 || vnodeEssence->modeBits)) {
3028 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3029 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3030 vnodeNumber, unique,
3031 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3035 CopyOnWrite(salvinfo, dir);
3036 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3042 /* Check if the Uniquifiers match. If not, change the directory entry
3043 * so its unique matches the vnode unique. Delete if the unique is zero
3044 * or if the directory is orphaned.
3046 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3047 if (!vnodeEssence->unique
3048 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3049 /* This is an orphaned directory. Don't delete the . or ..
3050 * entry. Otherwise, it will get created in the next
3051 * salvage and deleted again here. So Just skip it.
3056 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3059 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3063 fid.Vnode = vnodeNumber;
3064 fid.Unique = vnodeEssence->unique;
3065 CopyOnWrite(salvinfo, dir);
3066 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3068 osi_Assert(Create(&dir->dirHandle, name, &fid) == 0);
3071 return 0; /* no need to continue */
3074 if (strcmp(name, ".") == 0) {
3075 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3078 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3080 CopyOnWrite(salvinfo, dir);
3081 osi_Assert(Delete(&dir->dirHandle, ".") == 0);
3082 fid.Vnode = dir->vnodeNumber;
3083 fid.Unique = dir->unique;
3084 osi_Assert(Create(&dir->dirHandle, ".", &fid) == 0);
3087 vnodeNumber = fid.Vnode; /* Get the new Essence */
3088 unique = fid.Unique;
3089 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3092 } else if (strcmp(name, "..") == 0) {
3095 struct VnodeEssence *dotdot;
3096 pa.Vnode = dir->parent;
3097 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3098 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3099 pa.Unique = dotdot->unique;
3101 pa.Vnode = dir->vnodeNumber;
3102 pa.Unique = dir->unique;
3104 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3106 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3108 CopyOnWrite(salvinfo, dir);
3109 osi_Assert(Delete(&dir->dirHandle, "..") == 0);
3110 osi_Assert(Create(&dir->dirHandle, "..", &pa) == 0);
3113 vnodeNumber = pa.Vnode; /* Get the new Essence */
3115 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3117 dir->haveDotDot = 1;
3118 } else if (strncmp(name, ".__afs", 6) == 0) {
3120 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3123 CopyOnWrite(salvinfo, dir);
3124 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3126 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3127 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3130 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3131 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3132 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3133 && !(vnodeEssence->modeBits & 0111)) {
3134 afs_sfsize_t nBytes;
3140 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3141 vnodeEssence->InodeNumber);
3144 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3148 size = FDH_SIZE(fdP);
3150 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3151 FDH_REALLYCLOSE(fdP);
3158 nBytes = FDH_PREAD(fdP, buf, size, 0);
3159 if (nBytes == size) {
3161 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3162 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3163 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3164 Testing ? "would convert" : "converted");
3165 vnodeEssence->modeBits |= 0111;
3166 vnodeEssence->changed = 1;
3167 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3168 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3169 dir->name ? dir->name : "??", name, buf);
3171 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3172 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3174 FDH_REALLYCLOSE(fdP);
3177 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3178 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3179 if (vnodeIdToClass(vnodeNumber) == vLarge
3180 && vnodeEssence->name == NULL) {
3182 if ((n = (char *)malloc(strlen(name) + 1)))
3184 vnodeEssence->name = n;
3187 /* The directory entry points to the vnode. Check to see if the
3188 * vnode points back to the directory. If not, then let the
3189 * directory claim it (else it might end up orphaned). Vnodes
3190 * already claimed by another directory are deleted from this
3191 * directory: hardlinks to the same vnode are not allowed
3192 * from different directories.
3194 if (vnodeEssence->parent != dir->vnodeNumber) {
3195 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3196 /* Vnode does not point back to this directory.
3197 * Orphaned dirs cannot claim a file (it may belong to
3198 * another non-orphaned dir).
3201 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3203 vnodeEssence->parent = dir->vnodeNumber;
3204 vnodeEssence->changed = 1;
3206 /* Vnode was claimed by another directory */
3209 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3210 } else if (vnodeNumber == 1) {
3211 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3213 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3217 CopyOnWrite(salvinfo, dir);
3218 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3223 /* This directory claims the vnode */
3224 vnodeEssence->claimed = 1;
3226 vnodeEssence->count--;
3231 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3232 VnodeClass class, Inode ino, Unique * maxu)
3234 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3235 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3236 char buf[SIZEOF_LARGEDISKVNODE];
3237 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3239 StreamHandle_t *file;
3244 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3245 fdP = IH_OPEN(vip->handle);
3246 osi_Assert(fdP != NULL);
3247 file = FDH_FDOPEN(fdP, "r+");
3248 osi_Assert(file != NULL);
3249 size = OS_SIZE(fdP->fd_fd);
3250 osi_Assert(size != -1);
3251 vip->nVnodes = (size / vcp->diskSize) - 1;
3252 if (vip->nVnodes > 0) {
3253 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3254 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3255 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3256 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3257 if (class == vLarge) {
3258 osi_Assert((vip->inodes = (Inode *)
3259 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3268 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3269 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3270 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3271 nVnodes--, vnodeIndex++) {
3272 if (vnode->type != vNull) {
3273 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3274 afs_fsize_t vnodeLength;
3275 vip->nAllocatedVnodes++;
3276 vep->count = vnode->linkCount;
3277 VNDISK_GET_LEN(vnodeLength, vnode);
3278 vep->blockCount = nBlocks(vnodeLength);
3279 vip->volumeBlockCount += vep->blockCount;
3280 vep->parent = vnode->parent;
3281 vep->unique = vnode->uniquifier;
3282 if (*maxu < vnode->uniquifier)
3283 *maxu = vnode->uniquifier;
3284 vep->modeBits = vnode->modeBits;
3285 vep->InodeNumber = VNDISK_GET_INO(vnode);
3286 vep->type = vnode->type;
3287 vep->author = vnode->author;
3288 vep->owner = vnode->owner;
3289 vep->group = vnode->group;
3290 if (vnode->type == vDirectory) {
3291 if (class != vLarge) {
3292 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3293 vip->nAllocatedVnodes--;
3294 memset(vnode, 0, sizeof(vnode));
3295 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3296 vnodeIndexOffset(vcp, vnodeNumber),
3297 (char *)&vnode, sizeof(vnode));
3298 salvinfo->VolumeChanged = 1;
3300 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3309 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3312 struct VnodeEssence *parentvp;
3318 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3319 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3320 strcat(path, OS_DIRSEP);
3321 strcat(path, vp->name);
3327 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3328 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3331 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3333 struct VnodeEssence *vep;
3336 return (1); /* Vnode zero does not exist */
3338 return (0); /* The root dir vnode is always claimed */
3339 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3340 if (!vep || !vep->claimed)
3341 return (1); /* Vnode is not claimed - it is orphaned */
3343 return (IsVnodeOrphaned(salvinfo, vep->parent));
3347 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3348 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3349 struct DirSummary *rootdir, int *rootdirfound)
3351 static struct DirSummary dir;
3352 static struct DirHandle dirHandle;
3353 struct VnodeEssence *parent;
3354 static char path[MAXPATHLEN];
3357 if (dirVnodeInfo->vnodes[i].salvaged)
3358 return; /* already salvaged */
3361 dirVnodeInfo->vnodes[i].salvaged = 1;
3363 if (dirVnodeInfo->inodes[i] == 0)
3364 return; /* Not allocated to a directory */
3366 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3367 if (dirVnodeInfo->vnodes[i].parent) {
3368 Log("Bad parent, vnode 1; %s...\n",
3369 (Testing ? "skipping" : "salvaging"));
3370 dirVnodeInfo->vnodes[i].parent = 0;
3371 dirVnodeInfo->vnodes[i].changed = 1;
3374 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3375 if (parent && parent->salvaged == 0)
3376 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3377 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3378 rootdir, rootdirfound);
3381 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3382 dir.unique = dirVnodeInfo->vnodes[i].unique;
3385 dir.parent = dirVnodeInfo->vnodes[i].parent;
3386 dir.haveDot = dir.haveDotDot = 0;
3387 dir.ds_linkH = alinkH;
3388 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3389 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3391 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3394 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3395 (Testing ? "skipping" : "salvaging"));
3398 CopyAndSalvage(salvinfo, &dir);
3400 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3403 dirHandle = dir.dirHandle;
3406 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3407 &dirVnodeInfo->vnodes[i], path);
3410 /* If enumeration failed for random reasons, we will probably delete
3411 * too much stuff, so we guard against this instead.
3413 struct judgeEntry_params judge_params;
3414 judge_params.salvinfo = salvinfo;
3415 judge_params.dir = &dir;
3417 osi_Assert(EnumerateDir(&dirHandle, JudgeEntry, &judge_params) == 0);
3420 /* Delete the old directory if it was copied in order to salvage.
3421 * CopyOnWrite has written the new inode # to the disk, but we still
3422 * have the old one in our local structure here. Thus, we idec the
3426 if (dir.copied && !Testing) {
3427 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3428 osi_Assert(code == 0);
3429 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3432 /* Remember rootdir DirSummary _after_ it has been judged */
3433 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3434 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3442 * Get a new FID that can be used to create a new file.
3444 * @param[in] volHeader vol header for the volume
3445 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3446 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3447 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3448 * updated to the new max unique if we create a new
3452 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3453 VnodeClass class, AFSFid *afid, Unique *maxunique)
3456 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3457 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3461 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3462 /* no free vnodes; make a new one */
3463 salvinfo->vnodeInfo[class].nVnodes++;
3464 salvinfo->vnodeInfo[class].vnodes =
3465 realloc(salvinfo->vnodeInfo[class].vnodes,
3466 sizeof(struct VnodeEssence) * (i+1));
3468 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3471 afid->Vnode = bitNumberToVnodeNumber(i, class);
3473 if (volHeader->uniquifier < (*maxunique + 1)) {
3474 /* header uniq is bad; it will get bumped by 2000 later */
3475 afid->Unique = *maxunique + 1 + 2000;
3478 /* header uniq seems okay; just use that */
3479 afid->Unique = *maxunique = volHeader->uniquifier++;
3484 * Create a vnode for a README file explaining not to use a recreated-root vol.
3486 * @param[in] volHeader vol header for the volume
3487 * @param[in] alinkH ihandle for i/o for the volume
3488 * @param[in] vid volume id
3489 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3490 * updated to the new max unique if we create a new
3492 * @param[out] afid FID for the new readme vnode
3493 * @param[out] ainode the inode for the new readme file
3495 * @return operation status
3500 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3501 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3505 struct VnodeDiskObject *rvnode = NULL;
3507 IHandle_t *readmeH = NULL;
3508 struct VnodeEssence *vep;
3510 time_t now = time(NULL);
3512 /* Try to make the note brief, but informative. Only administrators should
3513 * be able to read this file at first, so we can hopefully assume they
3514 * know what AFS is, what a volume is, etc. */
3516 "This volume has been salvaged, but has lost its original root directory.\n"
3517 "The root directory that exists now has been recreated from orphan files\n"
3518 "from the rest of the volume. This recreated root directory may interfere\n"
3519 "with old cached data on clients, and there is no way the salvager can\n"
3520 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3521 "use this volume, but only copy the salvaged data to a new volume.\n"
3522 "Continuing to use this volume as it exists now may cause some clients to\n"
3523 "behave oddly when accessing this volume.\n"
3524 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3525 /* ^ the person reading this probably just lost some data, so they could
3526 * use some cheering up. */
3528 /* -1 for the trailing NUL */
3529 length = sizeof(readme) - 1;
3531 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3533 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3535 /* create the inode and write the contents */
3536 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3537 salvinfo->fileSysPath, 0, vid,
3538 afid->Vnode, afid->Unique, 1);
3539 if (!VALID_INO(readmeinode)) {
3540 Log("CreateReadme: readme IH_CREATE failed\n");
3544 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3545 bytes = IH_IWRITE(readmeH, 0, readme, length);
3546 IH_RELEASE(readmeH);
3548 if (bytes != length) {
3549 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3550 (int)sizeof(readme));
3554 /* create the vnode and write it out */
3555 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3557 Log("CreateRootDir: error alloc'ing memory\n");
3561 rvnode->type = vFile;
3563 rvnode->modeBits = 0777;
3564 rvnode->linkCount = 1;
3565 VNDISK_SET_LEN(rvnode, length);
3566 rvnode->uniquifier = afid->Unique;
3567 rvnode->dataVersion = 1;
3568 VNDISK_SET_INO(rvnode, readmeinode);
3569 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3574 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3576 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3577 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3578 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3580 if (bytes != SIZEOF_SMALLDISKVNODE) {
3581 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3582 (int)SIZEOF_SMALLDISKVNODE);
3586 /* update VnodeEssence for new readme vnode */
3587 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3589 vep->blockCount = nBlocks(length);
3590 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3591 vep->parent = rvnode->parent;
3592 vep->unique = rvnode->uniquifier;
3593 vep->modeBits = rvnode->modeBits;
3594 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3595 vep->type = rvnode->type;
3596 vep->author = rvnode->author;
3597 vep->owner = rvnode->owner;
3598 vep->group = rvnode->group;
3608 *ainode = readmeinode;
3613 if (IH_DEC(alinkH, readmeinode, vid)) {
3614 Log("CreateReadme (recovery): IH_DEC failed\n");
3626 * create a root dir for a volume that lacks one.
3628 * @param[in] volHeader vol header for the volume
3629 * @param[in] alinkH ihandle for disk access for this volume group
3630 * @param[in] vid volume id we're dealing with
3631 * @param[out] rootdir populated with info about the new root dir
3632 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3633 * updated to the new max unique if we create a new
3636 * @return operation status
3641 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3642 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3646 int decroot = 0, decreadme = 0;
3647 AFSFid did, readmeid;
3650 struct VnodeDiskObject *rootvnode = NULL;
3651 struct acl_accessList *ACL;
3654 struct VnodeEssence *vep;
3656 time_t now = time(NULL);
3658 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3659 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3663 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3664 /* We don't have any large vnodes in the volume; allocate room
3665 * for one so we can recreate the root dir */
3666 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3667 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3668 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3670 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3671 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3674 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3675 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3676 if (vep->type != vNull) {
3677 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3681 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3682 &readmeinode) != 0) {
3687 /* set the DV to a very high number, so it is unlikely that we collide
3688 * with a cached DV */
3691 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3693 if (!VALID_INO(rootinode)) {
3694 Log("CreateRootDir: IH_CREATE failed\n");
3699 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3700 rootinode, &salvinfo->VolumeChanged);
3704 if (MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3705 Log("CreateRootDir: MakeDir failed\n");
3708 if (Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3709 Log("CreateRootDir: Create failed\n");
3713 length = Length(&rootdir->dirHandle);
3714 DZap((void *)&rootdir->dirHandle);
3716 /* create the new root dir vnode */
3717 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3719 Log("CreateRootDir: malloc failed\n");
3723 /* only give 'rl' permissions to 'system:administrators'. We do this to
3724 * try to catch the attention of an administrator, that they should not
3725 * be writing to this directory or continue to use it. */
3726 ACL = VVnodeDiskACL(rootvnode);
3727 ACL->size = sizeof(struct acl_accessList);
3728 ACL->version = ACL_ACLVERSION;
3732 ACL->entries[0].id = -204; /* system:administrators */
3733 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3735 rootvnode->type = vDirectory;
3736 rootvnode->cloned = 0;
3737 rootvnode->modeBits = 0777;
3738 rootvnode->linkCount = 2;
3739 VNDISK_SET_LEN(rootvnode, length);
3740 rootvnode->uniquifier = 1;
3741 rootvnode->dataVersion = dv;
3742 VNDISK_SET_INO(rootvnode, rootinode);
3743 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3744 rootvnode->author = 0;
3745 rootvnode->owner = 0;
3746 rootvnode->parent = 0;
3747 rootvnode->group = 0;
3748 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3750 /* write it out to disk */
3751 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3752 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3753 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3755 if (bytes != SIZEOF_LARGEDISKVNODE) {
3756 /* just cast to int and don't worry about printing real 64-bit ints;
3757 * a large disk vnode isn't anywhere near the 32-bit limit */
3758 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3759 (int)SIZEOF_LARGEDISKVNODE);
3763 /* update VnodeEssence for the new root vnode */
3764 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3766 vep->blockCount = nBlocks(length);
3767 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3768 vep->parent = rootvnode->parent;
3769 vep->unique = rootvnode->uniquifier;
3770 vep->modeBits = rootvnode->modeBits;
3771 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3772 vep->type = rootvnode->type;
3773 vep->author = rootvnode->author;
3774 vep->owner = rootvnode->owner;
3775 vep->group = rootvnode->group;
3785 /* update DirSummary for the new root vnode */
3786 rootdir->vnodeNumber = 1;
3787 rootdir->unique = 1;
3788 rootdir->haveDot = 1;
3789 rootdir->haveDotDot = 1;
3790 rootdir->rwVid = vid;
3791 rootdir->copied = 0;
3792 rootdir->parent = 0;
3793 rootdir->name = strdup(".");
3794 rootdir->vname = volHeader->name;
3795 rootdir->ds_linkH = alinkH;
3802 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3803 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3805 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3806 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3816 * salvage a volume group.
3818 * @param[in] salvinfo information for the curent salvage job
3819 * @param[in] rwIsp inode summary for rw volume
3820 * @param[in] alinkH link table inode handle
3822 * @return operation status
3826 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3828 /* This routine, for now, will only be called for read-write volumes */
3830 int BlocksInVolume = 0, FilesInVolume = 0;
3832 struct DirSummary rootdir, oldrootdir;
3833 struct VnodeInfo *dirVnodeInfo;
3834 struct VnodeDiskObject vnode;
3835 VolumeDiskData volHeader;
3837 int orphaned, rootdirfound = 0;
3838 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3839 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3840 struct VnodeEssence *vep;
3843 afs_sfsize_t nBytes;
3845 VnodeId LFVnode, ThisVnode;
3846 Unique LFUnique, ThisUnique;
3850 vid = rwIsp->volSummary->header.id;
3851 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3852 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3853 osi_Assert(nBytes == sizeof(volHeader));
3854 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3855 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3856 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3858 DistilVnodeEssence(salvinfo, vid, vLarge,
3859 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3860 DistilVnodeEssence(salvinfo, vid, vSmall,
3861 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3863 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3864 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3865 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3866 &rootdir, &rootdirfound);
3869 nt_sync(salvinfo->fileSysDevice);
3871 sync(); /* This used to be done lower level, for every dir */
3878 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3880 Log("Cannot find root directory for volume %lu; attempting to create "
3881 "a new one\n", afs_printable_uint32_lu(vid));
3883 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3888 salvinfo->VolumeChanged = 1;
3892 /* Parse each vnode looking for orphaned vnodes and
3893 * connect them to the tree as orphaned (if requested).
3895 oldrootdir = rootdir;
3896 for (class = 0; class < nVNODECLASSES; class++) {
3897 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3898 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3899 ThisVnode = bitNumberToVnodeNumber(v, class);
3900 ThisUnique = vep->unique;
3902 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3903 continue; /* Ignore unused, claimed, and root vnodes */
3905 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3906 * entry in this vnode had incremented the parent link count (In
3907 * JudgeEntry()). We need to go to the parent and decrement that
3908 * link count. But if the parent's unique is zero, then the parent
3909 * link count was not incremented in JudgeEntry().
3911 if (class == vLarge) { /* directory vnode */
3912 pv = vnodeIdToBitNumber(vep->parent);
3913 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3914 if (vep->parent == 1 && newrootdir) {
3915 /* this vnode's parent was the volume root, and
3916 * we just created the volume root. So, the parent
3917 * dir didn't exist during JudgeEntry, so the link
3918 * count was not inc'd there, so don't dec it here.
3924 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3930 continue; /* If no rootdir, can't attach orphaned files */
3932 /* Here we attach orphaned files and directories into the
3933 * root directory, LVVnode, making sure link counts stay correct.
3935 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3936 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3937 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3939 /* Update this orphaned vnode's info. Its parent info and
3940 * link count (do for orphaned directories and files).
3942 vep->parent = LFVnode; /* Parent is the root dir */
3943 vep->unique = LFUnique;
3946 vep->count--; /* Inc link count (root dir will pt to it) */
3948 /* If this orphaned vnode is a directory, change '..'.
3949 * The name of the orphaned dir/file is unknown, so we
3950 * build a unique name. No need to CopyOnWrite the directory
3951 * since it is not connected to tree in BK or RO volume and
3952 * won't be visible there.
3954 if (class == vLarge) {
3958 /* Remove and recreate the ".." entry in this orphaned directory */
3959 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3960 salvinfo->vnodeInfo[class].inodes[v],
3961 &salvinfo->VolumeChanged);
3963 pa.Unique = LFUnique;
3964 osi_Assert(Delete(&dh, "..") == 0);
3965 osi_Assert(Create(&dh, "..", &pa) == 0);
3967 /* The original parent's link count was decremented above.
3968 * Here we increment the new parent's link count.
3970 pv = vnodeIdToBitNumber(LFVnode);
3971 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
3975 /* Go to the root dir and add this entry. The link count of the
3976 * root dir was incremented when ".." was created. Try 10 times.
3978 for (j = 0; j < 10; j++) {
3979 pa.Vnode = ThisVnode;
3980 pa.Unique = ThisUnique;
3982 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3984 vLarge) ? "__ORPHANDIR__" :
3985 "__ORPHANFILE__"), ThisVnode,
3988 CopyOnWrite(salvinfo, &rootdir);
3989 code = Create(&rootdir.dirHandle, npath, &pa);
3993 ThisUnique += 50; /* Try creating a different file */
3995 osi_Assert(code == 0);
3996 Log("Attaching orphaned %s to volume's root dir as %s\n",
3997 ((class == vLarge) ? "directory" : "file"), npath);
3999 } /* for each vnode in the class */
4000 } /* for each class of vnode */
4002 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4004 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4006 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4008 osi_Assert(code == 0);
4009 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4012 DFlush(); /* Flush the changes */
4013 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4014 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4015 orphans = ORPH_IGNORE;
4018 /* Write out all changed vnodes. Orphaned files and directories
4019 * will get removed here also (if requested).
4021 for (class = 0; class < nVNODECLASSES; class++) {
4022 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4023 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4024 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4025 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4026 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4027 for (i = 0; i < nVnodes; i++) {
4028 struct VnodeEssence *vnp = &vnodes[i];
4029 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4031 /* If the vnode is good but is unclaimed (not listed in
4032 * any directory entries), then it is orphaned.
4035 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4036 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4040 if (vnp->changed || vnp->count) {
4043 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4044 vnodeIndexOffset(vcp, vnodeNumber),
4045 (char *)&vnode, sizeof(vnode));
4046 osi_Assert(nBytes == sizeof(vnode));
4048 vnode.parent = vnp->parent;
4049 oldCount = vnode.linkCount;
4050 vnode.linkCount = vnode.linkCount - vnp->count;
4053 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4055 if (!vnp->todelete) {
4056 /* Orphans should have already been attached (if requested) */
4057 osi_Assert(orphans != ORPH_ATTACH);
4058 oblocks += vnp->blockCount;
4061 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4063 BlocksInVolume -= vnp->blockCount;
4065 if (VNDISK_GET_INO(&vnode)) {
4067 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4068 osi_Assert(code == 0);
4070 memset(&vnode, 0, sizeof(vnode));
4072 } else if (vnp->count) {
4074 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4077 vnode.modeBits = vnp->modeBits;
4080 vnode.dataVersion++;
4083 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4084 vnodeIndexOffset(vcp, vnodeNumber),
4085 (char *)&vnode, sizeof(vnode));
4086 osi_Assert(nBytes == sizeof(vnode));
4088 salvinfo->VolumeChanged = 1;
4092 if (!Showmode && ofiles) {
4093 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4095 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4099 for (class = 0; class < nVNODECLASSES; class++) {
4100 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4101 for (i = 0; i < vip->nVnodes; i++)
4102 if (vip->vnodes[i].name)
4103 free(vip->vnodes[i].name);
4110 /* Set correct resource utilization statistics */
4111 volHeader.filecount = FilesInVolume;
4112 volHeader.diskused = BlocksInVolume;
4114 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4115 if (volHeader.uniquifier < (maxunique + 1)) {
4117 Log("Volume uniquifier is too low; fixed\n");
4118 /* Plus 2,000 in case there are workstations out there with
4119 * cached vnodes that have since been deleted
4121 volHeader.uniquifier = (maxunique + 1 + 2000);
4125 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4126 "Only use this salvaged volume to copy data to another volume; "
4127 "do not continue to use this volume (%lu) as-is.\n",
4128 afs_printable_uint32_lu(vid));
4131 #ifdef FSSYNC_BUILD_CLIENT
4132 if (!Testing && salvinfo->VolumeChanged && salvinfo->useFSYNC) {
4133 afs_int32 fsync_code;
4135 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4137 Log("Error trying to tell the fileserver to break callbacks for "
4138 "changed volume %lu; error code %ld\n",
4139 afs_printable_uint32_lu(vid),
4140 afs_printable_int32_ld(fsync_code));
4142 salvinfo->VolumeChanged = 0;
4145 #endif /* FSSYNC_BUILD_CLIENT */
4147 /* Turn off the inUse bit; the volume's been salvaged! */
4148 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4149 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4150 volHeader.inService = 1; /* allow service again */
4151 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4152 volHeader.dontSalvage = DONT_SALVAGE;
4153 salvinfo->VolumeChanged = 0;
4155 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4156 osi_Assert(nBytes == sizeof(volHeader));
4159 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4160 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4161 FilesInVolume, BlocksInVolume);
4164 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4165 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4171 ClearROInUseBit(struct VolumeSummary *summary)
4173 IHandle_t *h = summary->volumeInfoHandle;
4174 afs_sfsize_t nBytes;
4176 VolumeDiskData volHeader;
4178 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4179 osi_Assert(nBytes == sizeof(volHeader));
4180 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4181 volHeader.inUse = 0;
4182 volHeader.needsSalvaged = 0;
4183 volHeader.inService = 1;
4184 volHeader.dontSalvage = DONT_SALVAGE;
4186 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4187 osi_Assert(nBytes == sizeof(volHeader));
4192 * Possible delete the volume.
4194 * deleteMe - Always do so, only a partial volume.
4197 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4198 char *message, int deleteMe, int check)
4200 if (readOnly(isp) || deleteMe) {
4201 if (isp->volSummary && isp->volSummary->fileName) {
4204 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4206 Log("It will be deleted on this server (you may find it elsewhere)\n");
4209 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4211 Log("it will be deleted instead. It should be recloned.\n");
4216 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4218 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4220 Log("Error %ld destroying volume disk header for volume %lu\n",
4221 afs_printable_int32_ld(code),
4222 afs_printable_uint32_lu(isp->volumeId));
4225 /* make sure we actually delete the fileName file; ENOENT
4226 * is fine, since VDestroyVolumeDiskHeader probably already
4228 if (unlink(path) && errno != ENOENT) {
4229 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4231 if (salvinfo->useFSYNC) {
4232 AskDelete(salvinfo, isp->volumeId);
4234 isp->volSummary->deleted = 1;
4237 } else if (!check) {
4238 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4240 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4244 #ifdef AFS_DEMAND_ATTACH_FS
4246 * Locks a volume on disk for salvaging.
4248 * @param[in] volumeId volume ID to lock
4250 * @return operation status
4252 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4253 * checked out and locked again
4258 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4263 /* should always be WRITE_LOCK, but keep the lock-type logic all
4264 * in one place, in VVolLockType. Params will be ignored, but
4265 * try to provide what we're logically doing. */
4266 locktype = VVolLockType(V_VOLUPD, 1);
4268 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4270 if (code == EBUSY) {
4271 Abort("Someone else appears to be using volume %lu; Aborted\n",
4272 afs_printable_uint32_lu(volumeId));
4274 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4275 afs_printable_int32_ld(code),
4276 afs_printable_uint32_lu(volumeId));
4279 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4280 if (code == SYNC_DENIED) {
4281 /* need to retry checking out volumes */
4284 if (code != SYNC_OK) {
4285 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4286 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4289 /* set inUse = programType in the volume header to ensure that nobody
4290 * tries to use this volume again without salvaging, if we somehow crash
4291 * or otherwise exit before finishing the salvage.
4295 struct VolumeHeader header;
4296 struct VolumeDiskHeader diskHeader;
4297 struct VolumeDiskData volHeader;
4299 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4304 DiskToVolumeHeader(&header, &diskHeader);
4306 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4307 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4308 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4314 volHeader.inUse = programType;
4316 /* If we can't re-write the header, bail out and error. We don't
4317 * assert when reading the header, since it's possible the
4318 * header isn't really there (when there's no data associated
4319 * with the volume; we just delete the vol header file in that
4320 * case). But if it's there enough that we can read it, but
4321 * somehow we cannot write to it to signify we're salvaging it,
4322 * we've got a big problem and we cannot continue. */
4323 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4330 #endif /* AFS_DEMAND_ATTACH_FS */
4333 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4338 memset(&res, 0, sizeof(res));
4340 for (i = 0; i < 3; i++) {
4341 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4342 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4344 if (code == SYNC_OK) {
4346 } else if (code == SYNC_DENIED) {
4347 #ifdef DEMAND_ATTACH_ENABLE
4348 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4350 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4352 Abort("Salvage aborted\n");
4353 } else if (code == SYNC_BAD_COMMAND) {
4354 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4356 #ifdef DEMAND_ATTACH_ENABLE
4357 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4359 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4361 Abort("Salvage aborted\n");
4364 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4365 FSYNC_clientFinis();
4369 if (code != SYNC_OK) {
4370 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4371 Abort("Salvage aborted\n");
4376 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4380 for (i = 0; i < 3; i++) {
4381 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4382 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4384 if (code == SYNC_OK) {
4386 } else if (code == SYNC_DENIED) {
4387 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4388 } else if (code == SYNC_BAD_COMMAND) {
4389 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4391 #ifdef DEMAND_ATTACH_ENABLE
4392 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4394 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4399 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4400 FSYNC_clientFinis();
4407 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4411 for (i = 0; i < 3; i++) {
4412 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4413 FSYNC_VOL_DONE, FSYNC_SALVAGE, NULL);
4415 if (code == SYNC_OK) {
4417 } else if (code == SYNC_DENIED) {
4418 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4419 } else if (code == SYNC_BAD_COMMAND) {
4420 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4422 #ifdef DEMAND_ATTACH_ENABLE
4423 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
4425 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4430 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4431 FSYNC_clientFinis();
4438 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4440 /* Volume parameter is passed in case iopen is upgraded in future to
4441 * require a volume Id to be passed
4444 IHandle_t *srcH, *destH;
4445 FdHandle_t *srcFdP, *destFdP;
4447 afs_foff_t size = 0;
4449 IH_INIT(srcH, device, rwvolume, inode1);
4450 srcFdP = IH_OPEN(srcH);
4451 osi_Assert(srcFdP != NULL);
4452 IH_INIT(destH, device, rwvolume, inode2);
4453 destFdP = IH_OPEN(destH);
4454 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4455 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4458 osi_Assert(nBytes == 0);
4459 FDH_REALLYCLOSE(srcFdP);
4460 FDH_REALLYCLOSE(destFdP);
4467 PrintInodeList(struct SalvInfo *salvinfo)
4469 struct ViceInodeInfo *ip;
4470 struct ViceInodeInfo *buf;
4471 struct afs_stat status;
4475 osi_Assert(afs_fstat(salvinfo->inodeFd, &status) == 0);
4476 buf = (struct ViceInodeInfo *)malloc(status.st_size);
4477 osi_Assert(buf != NULL);
4478 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
4479 osi_Assert(read(salvinfo->inodeFd, buf, status.st_size) == status.st_size);
4480 for (ip = buf; nInodes--; ip++) {
4481 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4482 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4483 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4484 ip->u.param[2], ip->u.param[3]);
4490 PrintInodeSummary(struct SalvInfo *salvinfo)
4493 struct InodeSummary *isp;
4495 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4496 isp = &salvinfo->inodeSummary[i];
4497 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4502 PrintVolumeSummary(struct SalvInfo *salvinfo)
4505 struct VolumeSummary *vsp;
4507 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4508 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4518 osi_Assert(0); /* Fork is never executed in the NT code path */
4522 #ifdef AFS_DEMAND_ATTACH_FS
4523 if ((f == 0) && (programType == salvageServer)) {
4524 /* we are a salvageserver child */
4525 #ifdef FSSYNC_BUILD_CLIENT
4526 VChildProcReconnectFS_r();
4528 #ifdef SALVSYNC_BUILD_CLIENT
4532 #endif /* AFS_DEMAND_ATTACH_FS */
4533 #endif /* !AFS_NT40_ENV */
4543 #ifdef AFS_DEMAND_ATTACH_FS
4544 if (programType == salvageServer) {
4545 #ifdef SALVSYNC_BUILD_CLIENT
4548 #ifdef FSSYNC_BUILD_CLIENT
4552 #endif /* AFS_DEMAND_ATTACH_FS */
4555 if (main_thread != pthread_self())
4556 pthread_exit((void *)code);
4569 pid = wait(&status);
4570 osi_Assert(pid != -1);
4571 if (WCOREDUMP(status))
4572 Log("\"%s\" core dumped!\n", prog);
4573 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4579 TimeStamp(time_t clock, int precision)
4582 static char timestamp[20];
4583 lt = localtime(&clock);
4585 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4587 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4592 CheckLogFile(char * log_path)
4594 char oldSlvgLog[AFSDIR_PATH_MAX];
4596 #ifndef AFS_NT40_ENV
4603 strcpy(oldSlvgLog, log_path);
4604 strcat(oldSlvgLog, ".old");
4606 renamefile(log_path, oldSlvgLog);
4607 logFile = afs_fopen(log_path, "a");
4609 if (!logFile) { /* still nothing, use stdout */
4613 #ifndef AFS_NAMEI_ENV
4614 AFS_DEBUG_IOPS_LOG(logFile);
4619 #ifndef AFS_NT40_ENV
4621 TimeStampLogFile(char * log_path)
4623 char stampSlvgLog[AFSDIR_PATH_MAX];
4628 lt = localtime(&now);
4629 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4630 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4631 log_path, lt->tm_year + 1900,
4632 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4635 /* try to link the logfile to a timestamped filename */
4636 /* if it fails, oh well, nothing we can do */
4637 link(log_path, stampSlvgLog);
4646 #ifndef AFS_NT40_ENV
4648 printf("Can't show log since using syslog.\n");
4659 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4662 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4665 while (fgets(line, sizeof(line), logFile))
4672 Log(const char *format, ...)
4678 va_start(args, format);
4679 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4681 #ifndef AFS_NT40_ENV
4683 syslog(LOG_INFO, "%s", tmp);
4687 gettimeofday(&now, 0);
4688 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4694 Abort(const char *format, ...)
4699 va_start(args, format);
4700 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4702 #ifndef AFS_NT40_ENV
4704 syslog(LOG_INFO, "%s", tmp);
4708 fprintf(logFile, "%s", tmp);
4720 ToString(const char *s)
4723 p = (char *)malloc(strlen(s) + 1);
4724 osi_Assert(p != NULL);
4729 /* Remove the FORCESALVAGE file */
4731 RemoveTheForce(char *path)
4734 struct afs_stat force; /* so we can use afs_stat to find it */
4735 strcpy(target,path);
4736 strcat(target,"/FORCESALVAGE");
4737 if (!Testing && ForceSalvage) {
4738 if (afs_stat(target,&force) == 0) unlink(target);
4742 #ifndef AFS_AIX32_ENV
4744 * UseTheForceLuke - see if we can use the force
4747 UseTheForceLuke(char *path)
4749 struct afs_stat force;
4751 strcpy(target,path);
4752 strcat(target,"/FORCESALVAGE");
4754 return (afs_stat(target, &force) == 0);
4758 * UseTheForceLuke - see if we can use the force
4761 * The VRMIX fsck will not muck with the filesystem it is supposedly
4762 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4763 * muck directly with the root inode, which is within the normal
4765 * ListViceInodes() has a side effect of setting ForceSalvage if
4766 * it detects a need, based on root inode examination.
4769 UseTheForceLuke(char *path)
4772 return 0; /* sorry OB1 */
4777 /* NT support routines */
4779 static char execpathname[MAX_PATH];
4781 nt_SalvagePartition(char *partName, int jobn)
4786 if (!*execpathname) {
4787 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4788 if (!n || n == 1023)
4791 job.cj_magic = SALVAGER_MAGIC;
4792 job.cj_number = jobn;
4793 (void)strcpy(job.cj_part, partName);
4794 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4799 nt_SetupPartitionSalvage(void *datap, int len)
4801 childJob_t *jobp = (childJob_t *) datap;
4802 char logname[AFSDIR_PATH_MAX];
4804 if (len != sizeof(childJob_t))
4806 if (jobp->cj_magic != SALVAGER_MAGIC)
4811 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4813 logFile = afs_fopen(logname, "w");
4821 #endif /* AFS_NT40_ENV */