2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
186 #include "volume_inline.h"
187 #include "salvsync.h"
188 #include "viceinode.h"
190 #include "volinodes.h" /* header magic number, etc. stuff */
191 #include "vol-salvage.h"
192 #include "vol_internal.h"
194 #ifdef FSSYNC_BUILD_CLIENT
195 #include "vg_cache.h"
202 /*@+fcnmacros +macrofcndecl@*/
205 extern off64_t afs_lseek(int FD, off64_t O, int F);
206 #endif /*S_SPLINT_S */
207 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
208 #define afs_stat stat64
209 #define afs_fstat fstat64
210 #define afs_open open64
211 #define afs_fopen fopen64
212 #else /* !O_LARGEFILE */
214 extern off_t afs_lseek(int FD, off_t O, int F);
215 #endif /*S_SPLINT_S */
216 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
217 #define afs_stat stat
218 #define afs_fstat fstat
219 #define afs_open open
220 #define afs_fopen fopen
221 #endif /* !O_LARGEFILE */
222 /*@=fcnmacros =macrofcndecl@*/
225 extern void *calloc();
227 static char *TimeStamp(time_t clock, int precision);
230 int debug; /* -d flag */
231 extern int Testing; /* -n flag */
232 int ListInodeOption; /* -i flag */
233 int ShowRootFiles; /* -r flag */
234 int RebuildDirs; /* -sal flag */
235 int Parallel = 4; /* -para X flag */
236 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
237 int forceR = 0; /* -b flag */
238 int ShowLog = 0; /* -showlog flag */
239 int ShowSuid = 0; /* -showsuid flag */
240 int ShowMounts = 0; /* -showmounts flag */
241 int orphans = ORPH_IGNORE; /* -orphans option */
246 int useSyslog = 0; /* -syslog flag */
247 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
256 #define MAXPARALLEL 32
258 int OKToZap; /* -o flag */
259 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
260 * in the volume header */
262 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
264 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
266 Device fileSysDevice; /* The device number of the current
267 * partition being salvaged */
271 char *fileSysPath; /* The path of the mounted partition currently
272 * being salvaged, i.e. the directory
273 * containing the volume headers */
275 char *fileSysPathName; /* NT needs this to make name pretty in log. */
276 IHandle_t *VGLinkH; /* Link handle for current volume group. */
277 int VGLinkH_cnt; /* # of references to lnk handle. */
278 struct DiskPartition64 *fileSysPartition; /* Partition being salvaged */
280 char *fileSysDeviceName; /* The block device where the file system
281 * being salvaged was mounted */
282 char *filesysfulldev;
284 int VolumeChanged; /* Set by any routine which would change the volume in
285 * a way which would require callback is to be broken if the
286 * volume was put back on line by an active file server */
288 VolumeDiskData VolInfo; /* A copy of the last good or salvaged volume header dealt with */
290 int nVolumesInInodeFile; /* Number of read-write volumes summarized */
291 int inodeFd; /* File descriptor for inode file */
294 struct VnodeInfo vnodeInfo[nVNODECLASSES];
297 struct VolumeSummary *volumeSummaryp = NULL; /* Holds all the volumes in a part */
298 int nVolumes; /* Number of volumes (read-write and read-only)
299 * in volume summary */
305 /* Forward declarations */
306 /*@printflike@*/ void Log(const char *format, ...);
307 /*@printflike@*/ void Abort(const char *format, ...);
308 static int IsVnodeOrphaned(VnodeId vnode);
309 static int AskVolumeSummary(VolumeId singleVolumeNumber);
311 #ifdef AFS_DEMAND_ATTACH_FS
312 static int LockVolume(VolumeId volumeId);
313 #endif /* AFS_DEMAND_ATTACH_FS */
315 /* Uniquifier stored in the Inode */
320 return (u & 0x3fffff);
322 #if defined(AFS_SGI_EXMAG)
323 return (u & SGI_UNIQMASK);
326 #endif /* AFS_SGI_EXMAG */
331 BadError(register int aerror)
333 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
335 return 0; /* otherwise may be transient, e.g. EMFILE */
340 char *save_args[MAX_ARGS];
342 extern pthread_t main_thread;
343 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
347 * Get the salvage lock if not already held. Hold until process exits.
349 * @param[in] locktype READ_LOCK or WRITE_LOCK
352 _ObtainSalvageLock(int locktype)
354 struct VLockFile salvageLock;
359 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
361 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
364 "salvager: There appears to be another salvager running! "
369 "salvager: Error %d trying to acquire salvage lock! "
375 ObtainSalvageLock(void)
377 _ObtainSalvageLock(WRITE_LOCK);
380 ObtainSharedSalvageLock(void)
382 _ObtainSalvageLock(READ_LOCK);
386 #ifdef AFS_SGI_XFS_IOPS_ENV
387 /* Check if the given partition is mounted. For XFS, the root inode is not a
388 * constant. So we check the hard way.
391 IsPartitionMounted(char *part)
394 struct mntent *mntent;
396 assert(mntfp = setmntent(MOUNTED, "r"));
397 while (mntent = getmntent(mntfp)) {
398 if (!strcmp(part, mntent->mnt_dir))
403 return mntent ? 1 : 1;
406 /* Check if the given inode is the root of the filesystem. */
407 #ifndef AFS_SGI_XFS_IOPS_ENV
409 IsRootInode(struct afs_stat *status)
412 * The root inode is not a fixed value in XFS partitions. So we need to
413 * see if the partition is in the list of mounted partitions. This only
414 * affects the SalvageFileSys path, so we check there.
416 return (status->st_ino == ROOTINODE);
421 #ifndef AFS_NAMEI_ENV
422 /* We don't want to salvage big files filesystems, since we can't put volumes on
426 CheckIfBigFilesFS(char *mountPoint, char *devName)
428 struct superblock fs;
431 if (strncmp(devName, "/dev/", 5)) {
432 (void)sprintf(name, "/dev/%s", devName);
434 (void)strcpy(name, devName);
437 if (ReadSuper(&fs, name) < 0) {
438 Log("Unable to read superblock. Not salvaging partition %s.\n",
442 if (IsBigFilesFileSystem(&fs)) {
443 Log("Partition %s is a big files filesystem, not salvaging.\n",
453 #define HDSTR "\\Device\\Harddisk"
454 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
456 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
461 static int dowarn = 1;
463 if (!QueryDosDevice(p1->devName, res, RES_LEN - 1))
465 if (strncmp(res, HDSTR, HDLEN)) {
468 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
469 res, HDSTR, p1->devName);
473 d1 = atoi(&res[HDLEN]);
475 if (!QueryDosDevice(p2->devName, res, RES_LEN - 1))
477 if (strncmp(res, HDSTR, HDLEN)) {
480 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
481 res, HDSTR, p2->devName);
485 d2 = atoi(&res[HDLEN]);
490 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
493 /* This assumes that two partitions with the same device number divided by
494 * PartsPerDisk are on the same disk.
497 SalvageFileSysParallel(struct DiskPartition64 *partP)
500 struct DiskPartition64 *partP;
501 int pid; /* Pid for this job */
502 int jobnumb; /* Log file job number */
503 struct job *nextjob; /* Next partition on disk to salvage */
505 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
506 struct job *thisjob = 0;
507 static int numjobs = 0;
508 static int jobcount = 0;
514 char logFileName[256];
518 /* We have a partition to salvage. Copy it into thisjob */
519 thisjob = (struct job *)malloc(sizeof(struct job));
521 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
524 memset(thisjob, 0, sizeof(struct job));
525 thisjob->partP = partP;
526 thisjob->jobnumb = jobcount;
528 } else if (jobcount == 0) {
529 /* We are asking to wait for all jobs (partp == 0), yet we never
532 Log("No file system partitions named %s* found; not salvaged\n",
533 VICE_PARTITION_PREFIX);
537 if (debug || Parallel == 1) {
539 SalvageFileSys(thisjob->partP, 0);
546 /* Check to see if thisjob is for a disk that we are already
547 * salvaging. If it is, link it in as the next job to do. The
548 * jobs array has 1 entry per disk being salvages. numjobs is
549 * the total number of disks currently being salvaged. In
550 * order to keep thejobs array compact, when a disk is
551 * completed, the hightest element in the jobs array is moved
552 * down to now open slot.
554 for (j = 0; j < numjobs; j++) {
555 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
556 /* On same disk, add it to this list and return */
557 thisjob->nextjob = jobs[j]->nextjob;
558 jobs[j]->nextjob = thisjob;
565 /* Loop until we start thisjob or until all existing jobs are finished */
566 while (thisjob || (!partP && (numjobs > 0))) {
567 startjob = -1; /* No new job to start */
569 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
570 /* Either the max jobs are running or we have to wait for all
571 * the jobs to finish. In either case, we wait for at least one
572 * job to finish. When it's done, clean up after it.
574 pid = wait(&wstatus);
576 for (j = 0; j < numjobs; j++) { /* Find which job it is */
577 if (pid == jobs[j]->pid)
581 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
582 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
585 numjobs--; /* job no longer running */
586 oldjob = jobs[j]; /* remember */
587 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
588 free(oldjob); /* free the old job */
590 /* If there is another partition on the disk to salvage, then
591 * say we will start it (startjob). If not, then put thisjob there
592 * and say we will start it.
594 if (jobs[j]) { /* Another partitions to salvage */
595 startjob = j; /* Will start it */
596 } else { /* There is not another partition to salvage */
598 jobs[j] = thisjob; /* Add thisjob */
600 startjob = j; /* Will start it */
602 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
603 startjob = -1; /* Don't start it - already running */
607 /* We don't have to wait for a job to complete */
609 jobs[numjobs] = thisjob; /* Add this job */
611 startjob = numjobs; /* Will start it */
615 /* Start up a new salvage job on a partition in job slot "startjob" */
616 if (startjob != -1) {
618 Log("Starting salvage of file system partition %s\n",
619 jobs[startjob]->partP->name);
621 /* For NT, we not only fork, but re-exec the salvager. Pass in the
622 * commands and pass the child job number via the data path.
625 nt_SalvagePartition(jobs[startjob]->partP->name,
626 jobs[startjob]->jobnumb);
627 jobs[startjob]->pid = pid;
632 jobs[startjob]->pid = pid;
638 for (fd = 0; fd < 16; fd++)
645 openlog("salvager", LOG_PID, useSyslogFacility);
649 (void)afs_snprintf(logFileName, sizeof logFileName,
651 AFSDIR_SERVER_SLVGLOG_FILEPATH,
652 jobs[startjob]->jobnumb);
653 logFile = afs_fopen(logFileName, "w");
658 SalvageFileSys1(jobs[startjob]->partP, 0);
663 } /* while ( thisjob || (!partP && numjobs > 0) ) */
665 /* If waited for all jobs to complete, now collect log files and return */
667 if (!useSyslog) /* if syslogging - no need to collect */
670 for (i = 0; i < jobcount; i++) {
671 (void)afs_snprintf(logFileName, sizeof logFileName, "%s.%d",
672 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
673 if ((passLog = afs_fopen(logFileName, "r"))) {
674 while (fgets(buf, sizeof(buf), passLog)) {
679 (void)unlink(logFileName);
688 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
690 if (!canfork || debug || Fork() == 0) {
691 SalvageFileSys1(partP, singleVolumeNumber);
692 if (canfork && !debug) {
697 Wait("SalvageFileSys");
701 get_DevName(char *pbuffer, char *wpath)
703 char pbuf[128], *ptr;
704 strcpy(pbuf, pbuffer);
705 ptr = (char *)strrchr(pbuf, '/');
711 ptr = (char *)strrchr(pbuffer, '/');
713 strcpy(pbuffer, ptr + 1);
720 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
723 char inodeListPath[256];
724 FILE *inodeFile = NULL;
725 static char tmpDevName[100];
726 static char wpath[100];
727 struct VolumeSummary *vsp, *esp;
738 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
739 Abort("Raced too many times with fileserver restarts while trying to "
740 "checkout/lock volumes; Aborted\n");
742 #ifdef AFS_DEMAND_ATTACH_FS
744 /* unlock all previous volume locks, since we're about to lock them
746 VLockFileReinit(&partP->volLockFile);
748 #endif /* AFS_DEMAND_ATTACH_FS */
750 fileSysPartition = partP;
751 fileSysDevice = fileSysPartition->device;
752 fileSysPathName = VPartitionPath(fileSysPartition);
755 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
756 (void)sprintf(fileSysPath, "%s\\", fileSysPathName);
757 name = partP->devName;
759 fileSysPath = fileSysPathName;
760 strcpy(tmpDevName, partP->devName);
761 name = get_DevName(tmpDevName, wpath);
762 fileSysDeviceName = name;
763 filesysfulldev = wpath;
766 if (singleVolumeNumber) {
767 #ifndef AFS_DEMAND_ATTACH_FS
768 /* only non-DAFS locks the partition when salvaging a single volume;
769 * DAFS will lock the individual volumes in the VG */
770 VLockPartition(partP->name);
771 #endif /* !AFS_DEMAND_ATTACH_FS */
775 /* salvageserver already setup fssync conn for us */
776 if ((programType != salvageServer) && !VConnectFS()) {
777 Abort("Couldn't connect to file server\n");
780 AskOffline(singleVolumeNumber, partP->name);
781 #ifdef AFS_DEMAND_ATTACH_FS
782 if (LockVolume(singleVolumeNumber)) {
785 #endif /* AFS_DEMAND_ATTACH_FS */
788 VLockPartition(partP->name);
792 ForceSalvage = UseTheForceLuke(fileSysPath);
795 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
796 partP->name, name, (Testing ? "(READONLY mode)" : ""));
798 Log("***Forced salvage of all volumes on this partition***\n");
803 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
810 assert((dirp = opendir(fileSysPath)) != NULL);
811 while ((dp = readdir(dirp))) {
812 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
813 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
815 Log("Removing old salvager temp files %s\n", dp->d_name);
816 strcpy(npath, fileSysPath);
818 strcat(npath, dp->d_name);
824 tdir = (tmpdir ? tmpdir : fileSysPath);
826 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
827 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
829 snprintf(inodeListPath, 255, "%s/salvage.inodes.%s.%d", tdir, name,
833 inodeFile = fopen(inodeListPath, "w+b");
835 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
838 /* Using nt_unlink here since we're really using the delete on close
839 * semantics of unlink. In most places in the salvager, we really do
840 * mean to unlink the file at that point. Those places have been
841 * modified to actually do that so that the NT crt can be used there.
843 code = nt_unlink(inodeListPath);
845 code = unlink(inodeListPath);
848 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
851 if (GetInodeSummary(inodeFile, singleVolumeNumber) < 0) {
855 inodeFd = fileno(inodeFile);
857 Abort("Temporary file %s is missing...\n", inodeListPath);
858 afs_lseek(inodeFd, 0L, SEEK_SET);
859 if (ListInodeOption) {
863 /* enumerate volumes in the partition.
864 * figure out sets of read-only + rw volumes.
865 * salvage each set, read-only volumes first, then read-write.
866 * Fix up inodes on last volume in set (whether it is read-write
869 if (GetVolumeSummary(singleVolumeNumber)) {
873 for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
874 i < nVolumesInInodeFile; i = j) {
875 VolumeId rwvid = inodeSummary[i].RWvolumeId;
877 j < nVolumesInInodeFile && inodeSummary[j].RWvolumeId == rwvid;
879 VolumeId vid = inodeSummary[j].volumeId;
880 struct VolumeSummary *tsp;
881 /* Scan volume list (from partition root directory) looking for the
882 * current rw volume number in the volume list from the inode scan.
883 * If there is one here that is not in the inode volume list,
885 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
887 DeleteExtraVolumeHeaderFile(vsp);
889 /* Now match up the volume summary info from the root directory with the
890 * entry in the volume list obtained from scanning inodes */
891 inodeSummary[j].volSummary = NULL;
892 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
893 if (tsp->header.id == vid) {
894 inodeSummary[j].volSummary = tsp;
900 /* Salvage the group of volumes (several read-only + 1 read/write)
901 * starting with the current read-only volume we're looking at.
903 SalvageVolumeGroup(&inodeSummary[i], j - i);
906 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
907 for (; vsp < esp; vsp++) {
909 DeleteExtraVolumeHeaderFile(vsp);
912 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
913 RemoveTheForce(fileSysPath);
915 if (!Testing && singleVolumeNumber) {
916 #ifdef AFS_DEMAND_ATTACH_FS
917 /* unlock vol headers so the fs can attach them when we AskOnline */
918 VLockFileReinit(&fileSysPartition->volLockFile);
919 #endif /* AFS_DEMAND_ATTACH_FS */
921 AskOnline(singleVolumeNumber, fileSysPartition->name);
923 /* Step through the volumeSummary list and set all volumes on-line.
924 * The volumes were taken off-line in GetVolumeSummary.
926 for (j = 0; j < nVolumes; j++) {
927 AskOnline(volumeSummaryp[j].header.id, fileSysPartition->name);
931 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
932 fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
935 fclose(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
939 DeleteExtraVolumeHeaderFile(register struct VolumeSummary *vsp)
942 sprintf(path, "%s/%s", fileSysPath, vsp->fileName);
945 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
948 code = VDestroyVolumeDiskHeader(fileSysPartition, vsp->header.id, vsp->header.parent);
950 Log("Error %ld destroying volume disk header for volume %lu\n",
951 afs_printable_int32_ld(code),
952 afs_printable_uint32_lu(vsp->header.id));
955 /* make sure we actually delete the fileName file; ENOENT
956 * is fine, since VDestroyVolumeDiskHeader probably already
958 if (unlink(path) && errno != ENOENT) {
959 Log("Unable to unlink %s (errno = %d)\n", path, errno);
966 CompareInodes(const void *_p1, const void *_p2)
968 register const struct ViceInodeInfo *p1 = _p1;
969 register const struct ViceInodeInfo *p2 = _p2;
970 if (p1->u.vnode.vnodeNumber == INODESPECIAL
971 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
972 VolumeId p1rwid, p2rwid;
974 (p1->u.vnode.vnodeNumber ==
975 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
977 (p2->u.vnode.vnodeNumber ==
978 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
983 if (p1->u.vnode.vnodeNumber == INODESPECIAL
984 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
985 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
986 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
987 if (p1->u.vnode.volumeId == p1rwid)
989 if (p2->u.vnode.volumeId == p2rwid)
991 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
993 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
994 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
995 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
997 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
999 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1001 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1003 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1005 /* The following tests are reversed, so that the most desirable
1006 * of several similar inodes comes first */
1007 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1008 #ifdef AFS_3DISPARES
1009 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1010 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1013 #ifdef AFS_SGI_EXMAG
1014 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1015 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1020 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1021 #ifdef AFS_3DISPARES
1022 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1023 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1026 #ifdef AFS_SGI_EXMAG
1027 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1028 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1033 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1034 #ifdef AFS_3DISPARES
1035 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1036 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1039 #ifdef AFS_SGI_EXMAG
1040 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1041 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1046 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1047 #ifdef AFS_3DISPARES
1048 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1049 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1052 #ifdef AFS_SGI_EXMAG
1053 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1054 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1063 CountVolumeInodes(register struct ViceInodeInfo *ip, int maxInodes,
1064 register struct InodeSummary *summary)
1066 VolumeId volume = ip->u.vnode.volumeId;
1067 VolumeId rwvolume = volume;
1068 register int n, nSpecial;
1069 register Unique maxunique;
1072 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1074 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1076 rwvolume = ip->u.special.parentId;
1077 /* This isn't quite right, as there could (in error) be different
1078 * parent inodes in different special vnodes */
1080 if (maxunique < ip->u.vnode.vnodeUniquifier)
1081 maxunique = ip->u.vnode.vnodeUniquifier;
1085 summary->volumeId = volume;
1086 summary->RWvolumeId = rwvolume;
1087 summary->nInodes = n;
1088 summary->nSpecialInodes = nSpecial;
1089 summary->maxUniquifier = maxunique;
1093 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1095 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1096 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1097 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1102 * Collect list of inodes in file named by path. If a truly fatal error,
1103 * unlink the file and abort. For lessor errors, return -1. The file will
1104 * be unlinked by the caller.
1107 GetInodeSummary(FILE *inodeFile, VolumeId singleVolumeNumber)
1109 struct afs_stat status;
1112 struct ViceInodeInfo *ip;
1113 struct InodeSummary summary;
1114 char summaryFileName[50];
1117 char *dev = fileSysPath;
1118 char *wpath = fileSysPath;
1120 char *dev = fileSysDeviceName;
1121 char *wpath = filesysfulldev;
1123 char *part = fileSysPath;
1126 /* This file used to come from vfsck; cobble it up ourselves now... */
1128 ListViceInodes(dev, fileSysPath, inodeFile,
1129 singleVolumeNumber ? OnlyOneVolume : 0,
1130 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1132 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1135 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1137 if (forceSal && !ForceSalvage) {
1138 Log("***Forced salvage of all volumes on this partition***\n");
1141 fseek(inodeFile, 0L, SEEK_SET);
1142 inodeFd = fileno(inodeFile);
1143 if (inodeFd == -1 || afs_fstat(inodeFd, &status) == -1) {
1144 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1146 tdir = (tmpdir ? tmpdir : part);
1148 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1149 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp"));
1151 (void)afs_snprintf(summaryFileName, sizeof summaryFileName,
1152 "%s/salvage.temp.%d", tdir, getpid());
1154 summaryFile = afs_fopen(summaryFileName, "a+");
1155 if (summaryFile == NULL) {
1156 Abort("Unable to create inode summary file\n");
1160 /* Using nt_unlink here since we're really using the delete on close
1161 * semantics of unlink. In most places in the salvager, we really do
1162 * mean to unlink the file at that point. Those places have been
1163 * modified to actually do that so that the NT crt can be used there.
1165 code = nt_unlink(summaryFileName);
1167 code = unlink(summaryFileName);
1170 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1173 if (!canfork || debug || Fork() == 0) {
1175 unsigned long st_size=(unsigned long) status.st_size;
1176 nInodes = st_size / sizeof(struct ViceInodeInfo);
1178 fclose(summaryFile);
1179 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1180 RemoveTheForce(fileSysPath);
1182 struct VolumeSummary *vsp;
1185 GetVolumeSummary(singleVolumeNumber);
1187 for (i = 0, vsp = volumeSummaryp; i < nVolumes; i++) {
1189 DeleteExtraVolumeHeaderFile(vsp);
1192 Log("%s vice inodes on %s; not salvaged\n",
1193 singleVolumeNumber ? "No applicable" : "No", dev);
1196 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1198 fclose(summaryFile);
1200 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1203 if (read(inodeFd, ip, st_size) != st_size) {
1204 fclose(summaryFile);
1205 Abort("Unable to read inode table; %s not salvaged\n", dev);
1207 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1208 if (afs_lseek(inodeFd, 0, SEEK_SET) == -1
1209 || write(inodeFd, ip, st_size) != st_size) {
1210 fclose(summaryFile);
1211 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1215 CountVolumeInodes(ip, nInodes, &summary);
1216 if (fwrite(&summary, sizeof(summary), 1, summaryFile) != 1) {
1217 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1218 fclose(summaryFile);
1221 summary.index += (summary.nInodes);
1222 nInodes -= summary.nInodes;
1223 ip += summary.nInodes;
1225 /* Following fflush is not fclose, because if it was debug mode would not work */
1226 if (fflush(summaryFile) == EOF || fsync(fileno(summaryFile)) == -1) {
1227 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1228 fclose(summaryFile);
1231 if (canfork && !debug) {
1236 if (Wait("Inode summary") == -1) {
1237 fclose(summaryFile);
1238 Exit(1); /* salvage of this partition aborted */
1241 assert(afs_fstat(fileno(summaryFile), &status) != -1);
1242 if (status.st_size != 0) {
1244 unsigned long st_status=(unsigned long)status.st_size;
1245 inodeSummary = (struct InodeSummary *)malloc(st_status);
1246 assert(inodeSummary != NULL);
1247 /* For GNU we need to do lseek to get the file pointer moved. */
1248 assert(afs_lseek(fileno(summaryFile), 0, SEEK_SET) == 0);
1249 ret = read(fileno(summaryFile), inodeSummary, st_status);
1250 assert(ret == st_status);
1252 nVolumesInInodeFile =(unsigned long)(status.st_size) / sizeof(struct InodeSummary);
1253 Log("%d nVolumesInInodeFile %d \n",nVolumesInInodeFile,(unsigned long)(status.st_size));
1254 fclose(summaryFile);
1258 /* Comparison routine for volume sort.
1259 This is setup so that a read-write volume comes immediately before
1260 any read-only clones of that volume */
1262 CompareVolumes(const void *_p1, const void *_p2)
1264 register const struct VolumeSummary *p1 = _p1;
1265 register const struct VolumeSummary *p2 = _p2;
1266 if (p1->header.parent != p2->header.parent)
1267 return p1->header.parent < p2->header.parent ? -1 : 1;
1268 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1270 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1272 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1276 * Gleans volumeSummary information by asking the fileserver
1278 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1279 * salvaging a whole partition
1281 * @return whether we obtained the volume summary information or not
1282 * @retval 0 success; we obtained the volume summary information
1283 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1285 * @retval 1 we did not get the volume summary information; either the
1286 * fileserver responded with an error, or we are not supposed to
1287 * ask the fileserver for the information (e.g. we are salvaging
1288 * the entire partition or we are not the salvageserver)
1290 * @note for non-DAFS, always returns 1
1293 AskVolumeSummary(VolumeId singleVolumeNumber)
1296 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1297 if (programType == salvageServer) {
1298 if (singleVolumeNumber) {
1299 FSSYNC_VGQry_response_t q_res;
1301 struct VolumeSummary *vsp;
1303 struct VolumeDiskHeader diskHdr;
1305 memset(&res, 0, sizeof(res));
1307 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1310 * We must wait for the partition to finish scanning before
1311 * can continue, since we will not know if we got the entire
1312 * VG membership unless the partition is fully scanned.
1313 * We could, in theory, just scan the partition ourselves if
1314 * the VG cache is not ready, but we would be doing the exact
1315 * same scan the fileserver is doing; it will almost always
1316 * be faster to wait for the fileserver. The only exceptions
1317 * are if the partition does not take very long to scan, and
1318 * in that case it's fast either way, so who cares?
1320 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1321 Log("waiting for fileserver to finish scanning partition %s...\n",
1322 fileSysPartition->name);
1324 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1325 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1326 * just so small partitions don't need to wait over 10
1327 * seconds every time, and large partitions are generally
1328 * polled only once every ten seconds. */
1329 sleep((i > 10) ? (i = 10) : i);
1331 code = FSYNC_VGCQuery(fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1335 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1336 /* This can happen if there's no header for the volume
1337 * we're salvaging, or no headers exist for the VG (if
1338 * we're salvaging an RW). Act as if we got a response
1339 * with no VG members. The headers may be created during
1340 * salvaging, if there are inodes in this VG. */
1342 memset(&q_res, 0, sizeof(q_res));
1343 q_res.rw = singleVolumeNumber;
1347 Log("fileserver refused VGCQuery request for volume %lu on "
1348 "partition %s, code %ld reason %ld\n",
1349 afs_printable_uint32_lu(singleVolumeNumber),
1350 fileSysPartition->name,
1351 afs_printable_int32_ld(code),
1352 afs_printable_int32_ld(res.hdr.reason));
1356 if (q_res.rw != singleVolumeNumber) {
1357 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1358 afs_printable_uint32_lu(singleVolumeNumber),
1359 afs_printable_uint32_lu(q_res.rw));
1360 #ifdef SALVSYNC_BUILD_CLIENT
1361 if (SALVSYNC_LinkVolume(q_res.rw,
1363 fileSysPartition->name,
1365 Log("schedule request failed\n");
1367 #endif /* SALVSYNC_BUILD_CLIENT */
1368 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1371 volumeSummaryp = malloc(VOL_VG_MAX_VOLS * sizeof(struct VolumeSummary));
1372 assert(volumeSummaryp != NULL);
1375 vsp = volumeSummaryp;
1377 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1378 char name[VMAXPATHLEN];
1380 if (!q_res.children[i]) {
1384 /* AskOffline for singleVolumeNumber was called much earlier */
1385 if (q_res.children[i] != singleVolumeNumber) {
1386 AskOffline(q_res.children[i], fileSysPartition->name);
1387 if (LockVolume(q_res.children[i])) {
1393 code = VReadVolumeDiskHeader(q_res.children[i], fileSysPartition, &diskHdr);
1395 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1396 afs_printable_uint32_lu(q_res.children[i]));
1401 DiskToVolumeHeader(&vsp->header, &diskHdr);
1402 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1403 vsp->fileName = ToString(name);
1408 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1413 Log("Cannot get volume summary from fileserver; falling back to scanning "
1414 "entire partition\n");
1417 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1422 * count how many volume headers are found by VWalkVolumeHeaders.
1424 * @param[in] dp the disk partition (unused)
1425 * @param[in] name full path to the .vol header (unused)
1426 * @param[in] hdr the header data (unused)
1427 * @param[in] last whether this is the last try or not (unused)
1428 * @param[in] rock actually an afs_int32*; the running count of how many
1429 * volumes we have found
1434 CountHeader(struct DiskPartition64 *dp, const char *name,
1435 struct VolumeDiskHeader *hdr, int last, void *rock)
1437 afs_int32 *nvols = (afs_int32 *)rock;
1443 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1446 struct SalvageScanParams {
1447 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1448 * vol id of the VG we're salvaging */
1449 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1450 * we're filling in */
1451 afs_int32 nVolumes; /**< # of vols we've encountered */
1452 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1453 * # of vols we've alloc'd memory for) */
1454 int retry; /**< do we need to retry vol lock/checkout? */
1458 * records volume summary info found from VWalkVolumeHeaders.
1460 * Found volumes are also taken offline if they are in the specific volume
1461 * group we are looking for.
1463 * @param[in] dp the disk partition
1464 * @param[in] name full path to the .vol header
1465 * @param[in] hdr the header data
1466 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1467 * @param[in] rock actually a struct SalvageScanParams*, containing the
1468 * information needed to record the volume summary data
1470 * @return operation status
1472 * @retval -1 volume locking raced with fileserver restart; checking out
1473 * and locking volumes needs to be retried
1474 * @retval 1 volume header is mis-named and should be deleted
1477 RecordHeader(struct DiskPartition64 *dp, const char *name,
1478 struct VolumeDiskHeader *hdr, int last, void *rock)
1480 char nameShouldBe[64];
1481 struct SalvageScanParams *params;
1482 struct VolumeSummary summary;
1483 VolumeId singleVolumeNumber;
1485 params = (struct SalvageScanParams *)rock;
1487 singleVolumeNumber = params->singleVolumeNumber;
1489 DiskToVolumeHeader(&summary.header, hdr);
1491 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1492 && summary.header.parent != singleVolumeNumber) {
1494 if (programType == salvageServer) {
1495 #ifdef SALVSYNC_BUILD_CLIENT
1496 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1497 summary.header.id, summary.header.parent);
1498 if (SALVSYNC_LinkVolume(summary.header.parent,
1502 Log("schedule request failed\n");
1505 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1508 Log("%u is a read-only volume; not salvaged\n",
1509 singleVolumeNumber);
1514 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1515 || summary.header.parent == singleVolumeNumber) {
1517 /* check if the header file is incorrectly named */
1519 const char *base = strrchr(name, '/');
1526 (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe,
1527 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1530 if (strcmp(nameShouldBe, base)) {
1531 /* .vol file has wrong name; retry/delete */
1535 if (!badname || last) {
1536 /* only offline the volume if the header is good, or if this is
1537 * the last try looking at it; avoid AskOffline'ing the same vol
1540 if (singleVolumeNumber
1541 && summary.header.id != singleVolumeNumber) {
1542 /* don't offline singleVolumeNumber; we already did that
1545 AskOffline(summary.header.id, fileSysPartition->name);
1547 #ifdef AFS_DEMAND_ATTACH_FS
1549 /* don't lock the volume if the header is bad, since we're
1550 * about to delete it anyway. */
1551 if (LockVolume(summary.header.id)) {
1556 #endif /* AFS_DEMAND_ATTACH_FS */
1560 if (last && !Showmode) {
1561 Log("Volume header file %s is incorrectly named (should be %s "
1562 "not %s); %sdeleted (it will be recreated later, if "
1563 "necessary)\n", name, nameShouldBe, base,
1564 (Testing ? "it would have been " : ""));
1569 summary.fileName = ToString(base);
1572 if (params->nVolumes > params->totalVolumes) {
1573 /* We found more volumes than we found on the first partition walk;
1574 * apparently something created a volume while we were
1575 * partition-salvaging, or we found more than 20 vols when salvaging a
1576 * particular volume. Abort if we detect this, since other programs
1577 * supposed to not touch the partition while it is partition-salvaging,
1578 * and we shouldn't find more than 20 vols in a VG.
1580 Abort("Found %ld vol headers, but should have found at most %ld! "
1581 "Make sure the volserver/fileserver are not running at the "
1582 "same time as a partition salvage\n",
1583 afs_printable_int32_ld(params->nVolumes),
1584 afs_printable_int32_ld(params->totalVolumes));
1587 memcpy(params->vsp, &summary, sizeof(summary));
1595 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1597 * If the header could not be read in at all, the header is always unlinked.
1598 * If instead RecordHeader said the header was bad (that is, the header file
1599 * is mis-named), we only unlink if we are doing a partition salvage, as
1600 * opposed to salvaging a specific volume group.
1602 * @param[in] dp the disk partition
1603 * @param[in] name full path to the .vol header
1604 * @param[in] hdr header data, or NULL if the header could not be read
1605 * @param[in] rock actually a struct SalvageScanParams*, with some information
1609 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1610 struct VolumeDiskHeader *hdr, void *rock)
1612 struct SalvageScanParams *params;
1615 params = (struct SalvageScanParams *)rock;
1618 /* no header; header is too bogus to read in at all */
1620 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1626 } else if (!params->singleVolumeNumber) {
1627 /* We were able to read in a header, but RecordHeader said something
1628 * was wrong with it. We only unlink those if we are doing a partition
1635 if (dounlink && unlink(name)) {
1636 Log("Error %d while trying to unlink %s\n", errno, name);
1641 * Populates volumeSummaryp with volume summary information, either by asking
1642 * the fileserver for VG information, or by scanning the /vicepX partition.
1644 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1645 * are salvaging, or 0 if this is a partition
1648 * @return operation status
1650 * @retval -1 we raced with a fileserver restart; checking out and locking
1651 * volumes must be retried
1654 GetVolumeSummary(VolumeId singleVolumeNumber)
1656 afs_int32 nvols = 0;
1657 struct SalvageScanParams params;
1660 code = AskVolumeSummary(singleVolumeNumber);
1662 /* we successfully got the vol information from the fileserver; no
1663 * need to scan the partition */
1667 /* we need to retry volume checkout */
1671 if (!singleVolumeNumber) {
1672 /* Count how many volumes we have in /vicepX */
1673 code = VWalkVolumeHeaders(fileSysPartition, fileSysPath, CountHeader,
1676 Abort("Can't read directory %s; not salvaged\n", fileSysPath);
1681 nvols = VOL_VG_MAX_VOLS;
1684 volumeSummaryp = malloc(nvols * sizeof(struct VolumeSummary));
1685 assert(volumeSummaryp != NULL);
1687 params.singleVolumeNumber = singleVolumeNumber;
1688 params.vsp = volumeSummaryp;
1689 params.nVolumes = 0;
1690 params.totalVolumes = nvols;
1693 /* walk the partition directory of volume headers and record the info
1694 * about them; unlinking invalid headers */
1695 code = VWalkVolumeHeaders(fileSysPartition, fileSysPath, RecordHeader,
1696 UnlinkHeader, ¶ms);
1698 /* we apparently need to retry checking-out/locking volumes */
1702 Abort("Failed to get volume header summary\n");
1704 nVolumes = params.nVolumes;
1706 qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
1712 /* Find the link table. This should be associated with the RW volume or, if
1713 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1716 FindLinkHandle(register struct InodeSummary *isp, int nVols,
1717 struct ViceInodeInfo *allInodes)
1720 struct ViceInodeInfo *ip;
1722 for (i = 0; i < nVols; i++) {
1723 ip = allInodes + isp[i].index;
1724 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1725 if (ip[j].u.special.type == VI_LINKTABLE)
1726 return ip[j].inodeNumber;
1733 CreateLinkTable(register struct InodeSummary *isp, Inode ino)
1735 struct versionStamp version;
1738 if (!VALID_INO(ino))
1740 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
1741 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1742 if (!VALID_INO(ino))
1744 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1745 isp->RWvolumeId, errno);
1746 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1747 fdP = IH_OPEN(VGLinkH);
1749 Abort("Can't open link table for volume %u (error = %d)\n",
1750 isp->RWvolumeId, errno);
1752 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1753 Abort("Can't truncate link table for volume %u (error = %d)\n",
1754 isp->RWvolumeId, errno);
1756 version.magic = LINKTABLEMAGIC;
1757 version.version = LINKTABLEVERSION;
1759 if (FDH_WRITE(fdP, (char *)&version, sizeof(version))
1761 Abort("Can't truncate link table for volume %u (error = %d)\n",
1762 isp->RWvolumeId, errno);
1764 FDH_REALLYCLOSE(fdP);
1766 /* If the volume summary exits (i.e., the V*.vol header file exists),
1767 * then set this inode there as well.
1769 if (isp->volSummary)
1770 isp->volSummary->header.linkTable = ino;
1779 SVGParms_t *parms = (SVGParms_t *) arg;
1780 DoSalvageVolumeGroup(parms->svgp_inodeSummaryp, parms->svgp_count);
1785 SalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1788 pthread_attr_t tattr;
1792 /* Initialize per volume global variables, even if later code does so */
1796 memset(&VolInfo, 0, sizeof(VolInfo));
1798 parms.svgp_inodeSummaryp = isp;
1799 parms.svgp_count = nVols;
1800 code = pthread_attr_init(&tattr);
1802 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1806 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1808 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1811 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1813 Log("Failed to create thread to salvage volume group %u\n",
1817 (void)pthread_join(tid, NULL);
1819 #endif /* AFS_NT40_ENV */
1822 DoSalvageVolumeGroup(register struct InodeSummary *isp, int nVols)
1824 struct ViceInodeInfo *inodes, *allInodes, *ip;
1825 int i, totalInodes, size, salvageTo;
1829 int dec_VGLinkH = 0;
1831 FdHandle_t *fdP = NULL;
1834 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1835 && isp->nSpecialInodes > 0);
1836 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1837 if (!ForceSalvage && QuickCheck(isp, nVols))
1840 if (ShowMounts && !haveRWvolume)
1842 if (canfork && !debug && Fork() != 0) {
1843 (void)Wait("Salvage volume group");
1846 for (i = 0, totalInodes = 0; i < nVols; i++)
1847 totalInodes += isp[i].nInodes;
1848 size = totalInodes * sizeof(struct ViceInodeInfo);
1849 inodes = (struct ViceInodeInfo *)malloc(size);
1850 allInodes = inodes - isp->index; /* this would the base of all the inodes
1851 * for the partition, if all the inodes
1852 * had been read into memory */
1854 (inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1856 assert(read(inodeFd, inodes, size) == size);
1858 /* Don't try to salvage a read write volume if there isn't one on this
1860 salvageTo = haveRWvolume ? 0 : 1;
1862 #ifdef AFS_NAMEI_ENV
1863 ino = FindLinkHandle(isp, nVols, allInodes);
1864 if (VALID_INO(ino)) {
1865 IH_INIT(VGLinkH, fileSysDevice, isp->RWvolumeId, ino);
1866 fdP = IH_OPEN(VGLinkH);
1868 if (!VALID_INO(ino) || fdP == NULL) {
1869 Log("%s link table for volume %u.\n",
1870 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1872 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1875 struct ViceInodeInfo *ip;
1876 CreateLinkTable(isp, ino);
1877 fdP = IH_OPEN(VGLinkH);
1878 /* Sync fake 1 link counts to the link table, now that it exists */
1880 for (i = 0; i < nVols; i++) {
1881 ip = allInodes + isp[i].index;
1882 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1884 nt_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1886 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1894 FDH_REALLYCLOSE(fdP);
1896 IH_INIT(VGLinkH, fileSysDevice, -1, -1);
1899 /* Salvage in reverse order--read/write volume last; this way any
1900 * Inodes not referenced by the time we salvage the read/write volume
1901 * can be picked up by the read/write volume */
1902 /* ACTUALLY, that's not done right now--the inodes just vanish */
1903 for (i = nVols - 1; i >= salvageTo; i--) {
1905 struct InodeSummary *lisp = &isp[i];
1906 #ifdef AFS_NAMEI_ENV
1907 /* If only the RO is present on this partition, the link table
1908 * shows up as a RW volume special file. Need to make sure the
1909 * salvager doesn't try to salvage the non-existent RW.
1911 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1912 /* If this only special inode is the link table, continue */
1913 if (inodes->u.special.type == VI_LINKTABLE) {
1920 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
1921 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
1922 /* Check inodes twice. The second time do things seriously. This
1923 * way the whole RO volume can be deleted, below, if anything goes wrong */
1924 for (check = 1; check >= 0; check--) {
1926 if (SalvageVolumeHeaderFile(lisp, allInodes, rw, check, &deleteMe)
1928 MaybeZapVolume(lisp, "Volume header", deleteMe, check);
1929 if (rw && deleteMe) {
1930 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
1931 * volume won't be called */
1937 if (rw && check == 1)
1939 if (SalvageVnodes(isp, lisp, allInodes, check) == -1) {
1940 MaybeZapVolume(lisp, "Vnode index", 0, check);
1946 /* Fix actual inode counts */
1948 Log("totalInodes %d\n",totalInodes);
1949 for (ip = inodes; totalInodes; ip++, totalInodes--) {
1950 static int TraceBadLinkCounts = 0;
1951 #ifdef AFS_NAMEI_ENV
1952 if (VGLinkH->ih_ino == ip->inodeNumber) {
1953 dec_VGLinkH = ip->linkCount - VGLinkH_cnt;
1954 VGLinkH_p1 = ip->u.param[0];
1955 continue; /* Deal with this last. */
1958 if (ip->linkCount != 0 && TraceBadLinkCounts) {
1959 TraceBadLinkCounts--; /* Limit reports, per volume */
1960 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(NULL, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
1962 while (ip->linkCount > 0) {
1963 /* below used to assert, not break */
1965 if (IH_DEC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1966 Log("idec failed. inode %s errno %d\n",
1967 PrintInode(NULL, ip->inodeNumber), errno);
1973 while (ip->linkCount < 0) {
1974 /* these used to be asserts */
1976 if (IH_INC(VGLinkH, ip->inodeNumber, ip->u.param[0])) {
1977 Log("iinc failed. inode %s errno %d\n",
1978 PrintInode(NULL, ip->inodeNumber), errno);
1985 #ifdef AFS_NAMEI_ENV
1986 while (dec_VGLinkH > 0) {
1987 if (IH_DEC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1988 Log("idec failed on link table, errno = %d\n", errno);
1992 while (dec_VGLinkH < 0) {
1993 if (IH_INC(VGLinkH, VGLinkH->ih_ino, VGLinkH_p1) < 0) {
1994 Log("iinc failed on link table, errno = %d\n", errno);
2001 /* Directory consistency checks on the rw volume */
2003 SalvageVolume(isp, VGLinkH);
2004 IH_RELEASE(VGLinkH);
2006 if (canfork && !debug) {
2013 QuickCheck(register struct InodeSummary *isp, int nVols)
2015 /* Check headers BEFORE forking */
2019 for (i = 0; i < nVols; i++) {
2020 struct VolumeSummary *vs = isp[i].volSummary;
2021 VolumeDiskData volHeader;
2023 /* Don't salvage just because phantom rw volume is there... */
2024 /* (If a read-only volume exists, read/write inodes must also exist) */
2025 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2029 IH_INIT(h, fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2030 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2031 == sizeof(volHeader)
2032 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2033 && volHeader.dontSalvage == DONT_SALVAGE
2034 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2035 if (volHeader.inUse != 0) {
2036 volHeader.inUse = 0;
2037 volHeader.inService = 1;
2039 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2040 != sizeof(volHeader)) {
2056 /* SalvageVolumeHeaderFile
2058 * Salvage the top level V*.vol header file. Make sure the special files
2059 * exist and that there are no duplicates.
2061 * Calls SalvageHeader for each possible type of volume special file.
2065 SalvageVolumeHeaderFile(register struct InodeSummary *isp,
2066 register struct ViceInodeInfo *inodes, int RW,
2067 int check, int *deleteMe)
2070 register struct ViceInodeInfo *ip;
2071 int allinodesobsolete = 1;
2072 struct VolumeDiskHeader diskHeader;
2073 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2076 /* keeps track of special inodes that are probably 'good'; they are
2077 * referenced in the vol header, and are included in the given inodes
2082 } goodspecial[MAXINODETYPE];
2087 memset(goodspecial, 0, sizeof(goodspecial));
2089 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2091 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2093 Log("cannot allocate memory for inode skip array when salvaging "
2094 "volume %lu; not performing duplicate special inode recovery\n",
2095 afs_printable_uint32_lu(isp->volumeId));
2096 /* still try to perform the salvage; the skip array only does anything
2097 * if we detect duplicate special inodes */
2101 * First, look at the special inodes and see if any are referenced by
2102 * the existing volume header. If we find duplicate special inodes, we
2103 * can use this information to use the referenced inode (it's more
2104 * likely to be the 'good' one), and throw away the duplicates.
2106 if (isp->volSummary && skip) {
2107 /* use tempHeader, so we can use the stuff[] array to easily index
2108 * into the isp->volSummary special inodes */
2109 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2111 for (i = 0; i < isp->nSpecialInodes; i++) {
2112 ip = &inodes[isp->index + i];
2113 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2114 /* will get taken care of in a later loop */
2117 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2118 goodspecial[ip->u.special.type-1].valid = 1;
2119 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2124 memset(&tempHeader, 0, sizeof(tempHeader));
2125 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2126 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2127 tempHeader.id = isp->volumeId;
2128 tempHeader.parent = isp->RWvolumeId;
2130 /* Check for duplicates (inodes are sorted by type field) */
2131 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2132 ip = &inodes[isp->index + i];
2133 if (ip->u.special.type == (ip + 1)->u.special.type) {
2134 afs_ino_str_t stmp1, stmp2;
2136 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2137 /* Will be caught in the loop below */
2141 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2142 ip->u.special.type, isp->volumeId,
2143 PrintInode(stmp1, ip->inodeNumber),
2144 PrintInode(stmp2, (ip+1)->inodeNumber));
2146 if (skip && goodspecial[ip->u.special.type-1].valid) {
2147 Inode gi = goodspecial[ip->u.special.type-1].inode;
2150 Log("using special inode referenced by vol header (%s)\n",
2151 PrintInode(stmp1, gi));
2154 /* the volume header references some special inode of
2155 * this type in the inodes array; are we it? */
2156 if (ip->inodeNumber != gi) {
2158 } else if ((ip+1)->inodeNumber != gi) {
2159 /* in case this is the last iteration; we need to
2160 * make sure we check ip+1, too */
2165 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2173 for (i = 0; i < isp->nSpecialInodes; i++) {
2174 ip = &inodes[isp->index + i];
2175 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2177 Log("Rubbish header inode %s of type %d\n",
2178 PrintInode(NULL, ip->inodeNumber),
2179 ip->u.special.type);
2185 Log("Rubbish header inode %s of type %d; deleted\n",
2186 PrintInode(NULL, ip->inodeNumber),
2187 ip->u.special.type);
2188 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2189 if (skip && skip[i]) {
2190 if (orphans == ORPH_REMOVE) {
2191 Log("Removing orphan special inode %s of type %d\n",
2192 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2195 Log("Ignoring orphan special inode %s of type %d\n",
2196 PrintInode(NULL, ip->inodeNumber), ip->u.special.type);
2197 /* fall through to the ip->linkCount--; line below */
2200 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2201 allinodesobsolete = 0;
2203 if (!check && ip->u.special.type != VI_LINKTABLE)
2204 ip->linkCount--; /* Keep the inode around */
2212 if (allinodesobsolete) {
2219 VGLinkH_cnt++; /* one for every header. */
2221 if (!RW && !check && isp->volSummary) {
2222 ClearROInUseBit(isp->volSummary);
2226 for (i = 0; i < MAXINODETYPE; i++) {
2227 if (stuff[i].inodeType == VI_LINKTABLE) {
2228 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2229 * And we may have recreated the link table earlier, so set the
2230 * RW header as well.
2232 if (VALID_INO(VGLinkH->ih_ino)) {
2233 *stuff[i].inode = VGLinkH->ih_ino;
2237 if (SalvageHeader(&stuff[i], isp, check, deleteMe) == -1 && check)
2241 if (isp->volSummary == NULL) {
2243 char headerName[64];
2244 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2245 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2247 Log("No header file for volume %u\n", isp->volumeId);
2251 Log("No header file for volume %u; %screating %s\n",
2252 isp->volumeId, (Testing ? "it would have been " : ""),
2254 isp->volSummary = (struct VolumeSummary *)
2255 malloc(sizeof(struct VolumeSummary));
2256 isp->volSummary->fileName = ToString(headerName);
2258 writefunc = VCreateVolumeDiskHeader;
2261 char headerName[64];
2262 /* hack: these two fields are obsolete... */
2263 isp->volSummary->header.volumeAcl = 0;
2264 isp->volSummary->header.volumeMountTable = 0;
2267 (&isp->volSummary->header, &tempHeader,
2268 sizeof(struct VolumeHeader))) {
2269 /* We often remove the name before calling us, so we make a fake one up */
2270 if (isp->volSummary->fileName) {
2271 strcpy(headerName, isp->volSummary->fileName);
2273 (void)afs_snprintf(headerName, sizeof headerName, VFORMAT, afs_printable_uint32_lu(isp->volumeId));
2274 isp->volSummary->fileName = ToString(headerName);
2276 (void)afs_snprintf(path, sizeof path, "%s/%s", fileSysPath, headerName);
2278 Log("Header file %s is damaged or no longer valid%s\n", path,
2279 (check ? "" : "; repairing"));
2283 writefunc = VWriteVolumeDiskHeader;
2287 memcpy(&isp->volSummary->header, &tempHeader,
2288 sizeof(struct VolumeHeader));
2291 Log("It would have written a new header file for volume %u\n",
2295 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2296 code = (*writefunc)(&diskHeader, fileSysPartition);
2298 Log("Error %ld writing volume header file for volume %lu\n",
2299 afs_printable_int32_ld(code),
2300 afs_printable_uint32_lu(diskHeader.id));
2305 IH_INIT(isp->volSummary->volumeInfoHandle, fileSysDevice, isp->RWvolumeId,
2306 isp->volSummary->header.volumeInfo);
2311 SalvageHeader(register struct stuff *sp, struct InodeSummary *isp, int check,
2315 VolumeDiskData volumeInfo;
2316 struct versionStamp fileHeader;
2325 #ifndef AFS_NAMEI_ENV
2326 if (sp->inodeType == VI_LINKTABLE)
2329 if (*(sp->inode) == 0) {
2331 Log("Missing inode in volume header (%s)\n", sp->description);
2335 Log("Missing inode in volume header (%s); %s\n", sp->description,
2336 (Testing ? "it would have recreated it" : "recreating"));
2339 IH_CREATE(NULL, fileSysDevice, fileSysPath, 0, isp->volumeId,
2340 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2341 if (!VALID_INO(*(sp->inode)))
2343 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2344 sp->description, errno);
2349 IH_INIT(specH, fileSysDevice, isp->RWvolumeId, *(sp->inode));
2350 fdP = IH_OPEN(specH);
2351 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2352 /* bail out early and destroy the volume */
2354 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2361 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2362 sp->description, errno);
2365 && (FDH_READ(fdP, (char *)&header, sp->size) != sp->size
2366 || header.fileHeader.magic != sp->stamp.magic)) {
2368 Log("Part of the header (%s) is corrupted\n", sp->description);
2369 FDH_REALLYCLOSE(fdP);
2373 Log("Part of the header (%s) is corrupted; recreating\n",
2377 if (sp->inodeType == VI_VOLINFO
2378 && header.volumeInfo.destroyMe == DESTROY_ME) {
2381 FDH_REALLYCLOSE(fdP);
2385 if (recreate && !Testing) {
2388 ("Internal error: recreating volume header (%s) in check mode\n",
2390 code = FDH_TRUNC(fdP, 0);
2392 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2393 sp->description, errno);
2395 /* The following code should be moved into vutil.c */
2396 if (sp->inodeType == VI_VOLINFO) {
2398 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2399 header.volumeInfo.stamp = sp->stamp;
2400 header.volumeInfo.id = isp->volumeId;
2401 header.volumeInfo.parentId = isp->RWvolumeId;
2402 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2403 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2404 isp->volumeId, isp->volumeId);
2405 header.volumeInfo.inService = 0;
2406 header.volumeInfo.blessed = 0;
2407 /* The + 1000 is a hack in case there are any files out in venus caches */
2408 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2409 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2410 header.volumeInfo.needsCallback = 0;
2411 gettimeofday(&tp, 0);
2412 header.volumeInfo.creationDate = tp.tv_sec;
2413 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2415 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2416 sp->description, errno);
2419 FDH_WRITE(fdP, (char *)&header.volumeInfo,
2420 sizeof(header.volumeInfo));
2421 if (code != sizeof(header.volumeInfo)) {
2424 ("Unable to write volume header file (%s) (errno = %d)\n",
2425 sp->description, errno);
2426 Abort("Unable to write entire volume header file (%s)\n",
2430 if (FDH_SEEK(fdP, 0, SEEK_SET) < 0) {
2432 ("Unable to seek to beginning of volume header file (%s) (errno = %d)\n",
2433 sp->description, errno);
2435 code = FDH_WRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp));
2436 if (code != sizeof(sp->stamp)) {
2439 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2440 sp->description, errno);
2442 ("Unable to write entire version stamp in volume header file (%s)\n",
2447 FDH_REALLYCLOSE(fdP);
2449 if (sp->inodeType == VI_VOLINFO) {
2450 VolInfo = header.volumeInfo;
2453 if (VolInfo.updateDate) {
2454 strcpy(update, TimeStamp(VolInfo.updateDate, 0));
2456 Log("%s (%u) %supdated %s\n", VolInfo.name, VolInfo.id,
2457 (Testing ? "it would have been " : ""), update);
2459 strcpy(update, TimeStamp(VolInfo.creationDate, 0));
2461 Log("%s (%u) not updated (created %s)\n", VolInfo.name,
2462 VolInfo.id, update);
2472 SalvageVnodes(register struct InodeSummary *rwIsp,
2473 register struct InodeSummary *thisIsp,
2474 register struct ViceInodeInfo *inodes, int check)
2476 int ilarge, ismall, ioffset, RW, nInodes;
2477 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2480 RW = (rwIsp == thisIsp);
2481 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2483 SalvageIndex(thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2484 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2485 if (check && ismall == -1)
2488 SalvageIndex(thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2489 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2490 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2494 SalvageIndex(Inode ino, VnodeClass class, int RW,
2495 register struct ViceInodeInfo *ip, int nInodes,
2496 struct VolumeSummary *volSummary, int check)
2498 VolumeId volumeNumber;
2499 char buf[SIZEOF_LARGEDISKVNODE];
2500 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2502 StreamHandle_t *file;
2503 struct VnodeClassInfo *vcp;
2505 afs_fsize_t vnodeLength;
2506 int vnodeIndex, nVnodes;
2507 afs_ino_str_t stmp1, stmp2;
2511 volumeNumber = volSummary->header.id;
2512 IH_INIT(handle, fileSysDevice, volSummary->header.parent, ino);
2513 fdP = IH_OPEN(handle);
2514 assert(fdP != NULL);
2515 file = FDH_FDOPEN(fdP, "r+");
2516 assert(file != NULL);
2517 vcp = &VnodeClassInfo[class];
2518 size = OS_SIZE(fdP->fd_fd);
2520 nVnodes = (size / vcp->diskSize) - 1;
2522 assert((nVnodes + 1) * vcp->diskSize == size);
2523 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
2527 for (vnodeIndex = 0;
2528 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2529 nVnodes--, vnodeIndex++) {
2530 if (vnode->type != vNull) {
2531 int vnodeChanged = 0;
2532 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2533 /* Log programs that belong to root (potentially suid root);
2534 * don't bother for read-only or backup volumes */
2535 #ifdef notdef /* This is done elsewhere */
2536 if (ShowRootFiles && RW && vnode->owner == 0 && vnodeNumber != 1)
2537 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", VolInfo.name, volumeNumber, vnode->parent, vnodeNumber, vnode->author, vnode->owner, vnode->modeBits);
2539 if (VNDISK_GET_INO(vnode) == 0) {
2541 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2542 memset(vnode, 0, vcp->diskSize);
2546 if (vcp->magic != vnode->vnodeMagic) {
2547 /* bad magic #, probably partially created vnode */
2548 Log("Partially allocated vnode %d deleted.\n",
2550 memset(vnode, 0, vcp->diskSize);
2554 /* ****** Should do a bit more salvage here: e.g. make sure
2555 * vnode type matches what it should be given the index */
2556 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2557 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2558 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2559 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2566 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2567 /* The following doesn't work, because the version number
2568 * is not maintained correctly by the file server */
2569 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2570 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2572 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2578 /* For RW volume, look for vnode with matching inode number;
2579 * if no such match, take the first determined by our sort
2581 register struct ViceInodeInfo *lip = ip;
2582 register int lnInodes = nInodes;
2584 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2585 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2594 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2595 /* "Matching" inode */
2599 vu = vnode->uniquifier;
2600 iu = ip->u.vnode.vnodeUniquifier;
2601 vd = vnode->dataVersion;
2602 id = ip->u.vnode.inodeDataVersion;
2604 * Because of the possibility of the uniquifier overflows (> 4M)
2605 * we compare them modulo the low 22-bits; we shouldn't worry
2606 * about mismatching since they shouldn't to many old
2607 * uniquifiers of the same vnode...
2609 if (IUnique(vu) != IUnique(iu)) {
2611 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2614 vnode->uniquifier = iu;
2615 #ifdef AFS_3DISPARES
2616 vnode->dataVersion = (id >= vd ?
2619 1887437 ? vd : id) :
2622 1887437 ? id : vd));
2624 #if defined(AFS_SGI_EXMAG)
2625 vnode->dataVersion = (id >= vd ?
2628 15099494 ? vd : id) :
2631 15099494 ? id : vd));
2633 vnode->dataVersion = (id > vd ? id : vd);
2634 #endif /* AFS_SGI_EXMAG */
2635 #endif /* AFS_3DISPARES */
2638 /* don't bother checking for vd > id any more, since
2639 * partial file transfers always result in this state,
2640 * and you can't do much else anyway (you've already
2641 * found the best data you can) */
2642 #ifdef AFS_3DISPARES
2643 if (!vnodeIsDirectory(vnodeNumber)
2644 && ((vd < id && (id - vd) < 1887437)
2645 || ((vd > id && (vd - id) > 1887437)))) {
2647 #if defined(AFS_SGI_EXMAG)
2648 if (!vnodeIsDirectory(vnodeNumber)
2649 && ((vd < id && (id - vd) < 15099494)
2650 || ((vd > id && (vd - id) > 15099494)))) {
2652 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2653 #endif /* AFS_SGI_EXMAG */
2656 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2657 vnode->dataVersion = id;
2662 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2665 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2667 VNDISK_SET_INO(vnode, ip->inodeNumber);
2672 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2674 VNDISK_SET_INO(vnode, ip->inodeNumber);
2677 VNDISK_GET_LEN(vnodeLength, vnode);
2678 if (ip->byteCount != vnodeLength) {
2681 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2686 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2687 VNDISK_SET_LEN(vnode, ip->byteCount);
2691 ip->linkCount--; /* Keep the inode around */
2694 } else { /* no matching inode */
2695 if (VNDISK_GET_INO(vnode) != 0
2696 || vnode->type == vDirectory) {
2697 /* No matching inode--get rid of the vnode */
2699 if (VNDISK_GET_INO(vnode)) {
2701 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)));
2705 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2710 if (VNDISK_GET_INO(vnode)) {
2712 time_t serverModifyTime = vnode->serverModifyTime;
2713 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(NULL, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2717 time_t serverModifyTime = vnode->serverModifyTime;
2718 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2721 memset(vnode, 0, vcp->diskSize);
2724 /* Should not reach here becuase we checked for
2725 * (inodeNumber == 0) above. And where we zero the vnode,
2726 * we also goto vnodeDone.
2730 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2734 } /* VNDISK_GET_INO(vnode) != 0 */
2736 assert(!(vnodeChanged && check));
2737 if (vnodeChanged && !Testing) {
2739 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2740 (char *)vnode, vcp->diskSize)
2742 VolumeChanged = 1; /* For break call back */
2753 struct VnodeEssence *
2754 CheckVnodeNumber(VnodeId vnodeNumber)
2757 struct VnodeInfo *vip;
2760 class = vnodeIdToClass(vnodeNumber);
2761 vip = &vnodeInfo[class];
2762 offset = vnodeIdToBitNumber(vnodeNumber);
2763 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2767 CopyOnWrite(register struct DirSummary *dir)
2769 /* Copy the directory unconditionally if we are going to change it:
2770 * not just if was cloned.
2772 struct VnodeDiskObject vnode;
2773 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2774 Inode oldinode, newinode;
2777 if (dir->copied || Testing)
2779 DFlush(); /* Well justified paranoia... */
2782 IH_IREAD(vnodeInfo[vLarge].handle,
2783 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2785 assert(code == sizeof(vnode));
2786 oldinode = VNDISK_GET_INO(&vnode);
2787 /* Increment the version number by a whole lot to avoid problems with
2788 * clients that were promised new version numbers--but the file server
2789 * crashed before the versions were written to disk.
2792 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2793 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2795 assert(VALID_INO(newinode));
2796 assert(CopyInode(fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2798 VNDISK_SET_INO(&vnode, newinode);
2800 IH_IWRITE(vnodeInfo[vLarge].handle,
2801 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2803 assert(code == sizeof(vnode));
2805 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2806 fileSysDevice, newinode);
2807 /* Don't delete the original inode right away, because the directory is
2808 * still being scanned.
2814 * This function should either successfully create a new dir, or give up
2815 * and leave things the way they were. In particular, if it fails to write
2816 * the new dir properly, it should return w/o changing the reference to the
2820 CopyAndSalvage(register struct DirSummary *dir)
2822 struct VnodeDiskObject vnode;
2823 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2824 Inode oldinode, newinode;
2829 afs_int32 parentUnique = 1;
2830 struct VnodeEssence *vnodeEssence;
2835 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2837 IH_IREAD(vnodeInfo[vLarge].handle,
2838 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2840 assert(lcode == sizeof(vnode));
2841 oldinode = VNDISK_GET_INO(&vnode);
2842 /* Increment the version number by a whole lot to avoid problems with
2843 * clients that were promised new version numbers--but the file server
2844 * crashed before the versions were written to disk.
2847 IH_CREATE(dir->ds_linkH, fileSysDevice, fileSysPath, 0, dir->rwVid,
2848 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2850 assert(VALID_INO(newinode));
2851 SetSalvageDirHandle(&newdir, dir->rwVid, fileSysDevice, newinode);
2853 /* Assign . and .. vnode numbers from dir and vnode.parent.
2854 * The uniquifier for . is in the vnode.
2855 * The uniquifier for .. might be set to a bogus value of 1 and
2856 * the salvager will later clean it up.
2858 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(vnode.parent))) {
2859 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2862 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2864 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2869 /* didn't really build the new directory properly, let's just give up. */
2870 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2871 Log("Directory salvage returned code %d, continuing.\n", code);
2873 Log("also failed to decrement link count on new inode");
2877 Log("Checking the results of the directory salvage...\n");
2878 if (!DirOK(&newdir)) {
2879 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2880 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2885 VNDISK_SET_INO(&vnode, newinode);
2886 length = Length(&newdir);
2887 VNDISK_SET_LEN(&vnode, length);
2889 IH_IWRITE(vnodeInfo[vLarge].handle,
2890 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2892 assert(lcode == sizeof(vnode));
2895 nt_sync(fileSysDevice);
2897 sync(); /* this is slow, but hopefully rarely called. We don't have
2898 * an open FD on the file itself to fsync.
2902 vnodeInfo[vLarge].handle->ih_synced = 1;
2904 /* make sure old directory file is really closed */
2905 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2906 FDH_REALLYCLOSE(fdP);
2908 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2910 dir->dirHandle = newdir;
2914 JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
2917 struct DirSummary *dir = (struct DirSummary *)dirVal;
2918 struct VnodeEssence *vnodeEssence;
2919 afs_int32 dirOrphaned, todelete;
2921 dirOrphaned = IsVnodeOrphaned(dir->vnodeNumber);
2923 vnodeEssence = CheckVnodeNumber(vnodeNumber);
2924 if (vnodeEssence == NULL) {
2926 Log("dir vnode %u: invalid entry deleted: %s/%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
2930 assert(Delete(&dir->dirHandle, name) == 0);
2935 #ifndef AFS_NAMEI_ENV
2936 /* On AIX machines, don't allow entries to point to inode 0. That is a special
2937 * mount inode for the partition. If this inode were deleted, it would crash
2940 if (vnodeEssence->InodeNumber == 0) {
2941 Log("dir vnode %d: invalid entry: %s/%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
2944 assert(Delete(&dir->dirHandle, name) == 0);
2951 if (!(vnodeNumber & 1) && !Showmode
2952 && !(vnodeEssence->count || vnodeEssence->unique
2953 || vnodeEssence->modeBits)) {
2954 Log("dir vnode %u: invalid entry: %s/%s (vnode %u, unique %u)%s\n",
2955 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
2956 vnodeNumber, unique,
2957 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
2962 assert(Delete(&dir->dirHandle, name) == 0);
2968 /* Check if the Uniquifiers match. If not, change the directory entry
2969 * so its unique matches the vnode unique. Delete if the unique is zero
2970 * or if the directory is orphaned.
2972 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
2973 if (!vnodeEssence->unique
2974 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
2975 /* This is an orphaned directory. Don't delete the . or ..
2976 * entry. Otherwise, it will get created in the next
2977 * salvage and deleted again here. So Just skip it.
2982 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
2985 Log("dir vnode %u: %s/%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
2989 fid.Vnode = vnodeNumber;
2990 fid.Unique = vnodeEssence->unique;
2992 assert(Delete(&dir->dirHandle, name) == 0);
2994 assert(Create(&dir->dirHandle, name, &fid) == 0);
2997 return 0; /* no need to continue */
3000 if (strcmp(name, ".") == 0) {
3001 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3004 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3007 assert(Delete(&dir->dirHandle, ".") == 0);
3008 fid.Vnode = dir->vnodeNumber;
3009 fid.Unique = dir->unique;
3010 assert(Create(&dir->dirHandle, ".", &fid) == 0);
3013 vnodeNumber = fid.Vnode; /* Get the new Essence */
3014 unique = fid.Unique;
3015 vnodeEssence = CheckVnodeNumber(vnodeNumber);
3018 } else if (strcmp(name, "..") == 0) {
3021 struct VnodeEssence *dotdot;
3022 pa.Vnode = dir->parent;
3023 dotdot = CheckVnodeNumber(pa.Vnode);
3024 assert(dotdot != NULL); /* XXX Should not be assert */
3025 pa.Unique = dotdot->unique;
3027 pa.Vnode = dir->vnodeNumber;
3028 pa.Unique = dir->unique;
3030 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3032 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3035 assert(Delete(&dir->dirHandle, "..") == 0);
3036 assert(Create(&dir->dirHandle, "..", &pa) == 0);
3039 vnodeNumber = pa.Vnode; /* Get the new Essence */
3041 vnodeEssence = CheckVnodeNumber(vnodeNumber);
3043 dir->haveDotDot = 1;
3044 } else if (strncmp(name, ".__afs", 6) == 0) {
3046 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3050 assert(Delete(&dir->dirHandle, name) == 0);
3052 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3053 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3056 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3057 Log("FOUND suid/sgid file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3058 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3059 && !(vnodeEssence->modeBits & 0111)) {
3065 IH_INIT(ihP, fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3066 vnodeEssence->InodeNumber);
3069 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3073 size = FDH_SIZE(fdP);
3075 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, size, vnodeNumber);
3076 FDH_REALLYCLOSE(fdP);
3083 code = FDH_READ(fdP, buf, size);
3086 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3087 Log("Volume %u (%s) mount point %s/%s to '%s' invalid, %s to symbolic link\n",
3088 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3089 Testing ? "would convert" : "converted");
3090 vnodeEssence->modeBits |= 0111;
3091 vnodeEssence->changed = 1;
3092 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s/%s to '%s'\n",
3093 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3094 dir->name ? dir->name : "??", name, buf);
3096 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3097 dir->vname, vnodeNumber, size, code);
3099 FDH_REALLYCLOSE(fdP);
3102 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3103 Log("FOUND root file: %s/%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3104 if (vnodeIdToClass(vnodeNumber) == vLarge
3105 && vnodeEssence->name == NULL) {
3107 if ((n = (char *)malloc(strlen(name) + 1)))
3109 vnodeEssence->name = n;
3112 /* The directory entry points to the vnode. Check to see if the
3113 * vnode points back to the directory. If not, then let the
3114 * directory claim it (else it might end up orphaned). Vnodes
3115 * already claimed by another directory are deleted from this
3116 * directory: hardlinks to the same vnode are not allowed
3117 * from different directories.
3119 if (vnodeEssence->parent != dir->vnodeNumber) {
3120 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3121 /* Vnode does not point back to this directory.
3122 * Orphaned dirs cannot claim a file (it may belong to
3123 * another non-orphaned dir).
3126 Log("dir vnode %u: %s/%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3128 vnodeEssence->parent = dir->vnodeNumber;
3129 vnodeEssence->changed = 1;
3131 /* Vnode was claimed by another directory */
3134 Log("dir vnode %u: %s/%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3135 } else if (vnodeNumber == 1) {
3136 Log("dir vnode %d: %s/%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3138 Log("dir vnode %u: %s/%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3143 assert(Delete(&dir->dirHandle, name) == 0);
3148 /* This directory claims the vnode */
3149 vnodeEssence->claimed = 1;
3151 vnodeEssence->count--;
3156 DistilVnodeEssence(VolumeId rwVId, VnodeClass class, Inode ino, Unique * maxu)
3158 register struct VnodeInfo *vip = &vnodeInfo[class];
3159 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3160 char buf[SIZEOF_LARGEDISKVNODE];
3161 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3163 StreamHandle_t *file;
3168 IH_INIT(vip->handle, fileSysDevice, rwVId, ino);
3169 fdP = IH_OPEN(vip->handle);
3170 assert(fdP != NULL);
3171 file = FDH_FDOPEN(fdP, "r+");
3172 assert(file != NULL);
3173 size = OS_SIZE(fdP->fd_fd);
3175 vip->nVnodes = (size / vcp->diskSize) - 1;
3176 if (vip->nVnodes > 0) {
3177 assert((vip->nVnodes + 1) * vcp->diskSize == size);
3178 assert(STREAM_SEEK(file, vcp->diskSize, 0) == 0);
3179 assert((vip->vnodes = (struct VnodeEssence *)
3180 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3181 if (class == vLarge) {
3182 assert((vip->inodes = (Inode *)
3183 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3192 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3193 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3194 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3195 nVnodes--, vnodeIndex++) {
3196 if (vnode->type != vNull) {
3197 register struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3198 afs_fsize_t vnodeLength;
3199 vip->nAllocatedVnodes++;
3200 vep->count = vnode->linkCount;
3201 VNDISK_GET_LEN(vnodeLength, vnode);
3202 vep->blockCount = nBlocks(vnodeLength);
3203 vip->volumeBlockCount += vep->blockCount;
3204 vep->parent = vnode->parent;
3205 vep->unique = vnode->uniquifier;
3206 if (*maxu < vnode->uniquifier)
3207 *maxu = vnode->uniquifier;
3208 vep->modeBits = vnode->modeBits;
3209 vep->InodeNumber = VNDISK_GET_INO(vnode);
3210 vep->type = vnode->type;
3211 vep->author = vnode->author;
3212 vep->owner = vnode->owner;
3213 vep->group = vnode->group;
3214 if (vnode->type == vDirectory) {
3215 if (class != vLarge) {
3216 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3217 vip->nAllocatedVnodes--;
3218 memset(vnode, 0, sizeof(vnode));
3219 IH_IWRITE(vnodeInfo[vSmall].handle,
3220 vnodeIndexOffset(vcp, vnodeNumber),
3221 (char *)&vnode, sizeof(vnode));
3224 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3233 GetDirName(VnodeId vnode, struct VnodeEssence *vp, char *path)
3235 struct VnodeEssence *parentvp;
3241 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(vp->parent))
3242 && GetDirName(vp->parent, parentvp, path)) {
3244 strcat(path, vp->name);
3250 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3251 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3254 IsVnodeOrphaned(VnodeId vnode)
3256 struct VnodeEssence *vep;
3259 return (1); /* Vnode zero does not exist */
3261 return (0); /* The root dir vnode is always claimed */
3262 vep = CheckVnodeNumber(vnode); /* Get the vnode essence */
3263 if (!vep || !vep->claimed)
3264 return (1); /* Vnode is not claimed - it is orphaned */
3266 return (IsVnodeOrphaned(vep->parent));
3270 SalvageDir(char *name, VolumeId rwVid, struct VnodeInfo *dirVnodeInfo,
3271 IHandle_t * alinkH, int i, struct DirSummary *rootdir,
3274 static struct DirSummary dir;
3275 static struct DirHandle dirHandle;
3276 struct VnodeEssence *parent;
3277 static char path[MAXPATHLEN];
3280 if (dirVnodeInfo->vnodes[i].salvaged)
3281 return; /* already salvaged */
3284 dirVnodeInfo->vnodes[i].salvaged = 1;
3286 if (dirVnodeInfo->inodes[i] == 0)
3287 return; /* Not allocated to a directory */
3289 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3290 if (dirVnodeInfo->vnodes[i].parent) {
3291 Log("Bad parent, vnode 1; %s...\n",
3292 (Testing ? "skipping" : "salvaging"));
3293 dirVnodeInfo->vnodes[i].parent = 0;
3294 dirVnodeInfo->vnodes[i].changed = 1;
3297 parent = CheckVnodeNumber(dirVnodeInfo->vnodes[i].parent);
3298 if (parent && parent->salvaged == 0)
3299 SalvageDir(name, rwVid, dirVnodeInfo, alinkH,
3300 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3301 rootdir, rootdirfound);
3304 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3305 dir.unique = dirVnodeInfo->vnodes[i].unique;
3308 dir.parent = dirVnodeInfo->vnodes[i].parent;
3309 dir.haveDot = dir.haveDotDot = 0;
3310 dir.ds_linkH = alinkH;
3311 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, fileSysDevice,
3312 dirVnodeInfo->inodes[i]);
3314 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3317 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3318 (Testing ? "skipping" : "salvaging"));
3321 CopyAndSalvage(&dir);
3325 dirHandle = dir.dirHandle;
3328 GetDirName(bitNumberToVnodeNumber(i, vLarge),
3329 &dirVnodeInfo->vnodes[i], path);
3332 /* If enumeration failed for random reasons, we will probably delete
3333 * too much stuff, so we guard against this instead.
3335 assert(EnumerateDir(&dirHandle, JudgeEntry, &dir) == 0);
3338 /* Delete the old directory if it was copied in order to salvage.
3339 * CopyOnWrite has written the new inode # to the disk, but we still
3340 * have the old one in our local structure here. Thus, we idec the
3344 if (dir.copied && !Testing) {
3345 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3347 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3350 /* Remember rootdir DirSummary _after_ it has been judged */
3351 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3352 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3360 SalvageVolume(register struct InodeSummary *rwIsp, IHandle_t * alinkH)
3362 /* This routine, for now, will only be called for read-write volumes */
3364 int BlocksInVolume = 0, FilesInVolume = 0;
3365 register VnodeClass class;
3366 struct DirSummary rootdir, oldrootdir;
3367 struct VnodeInfo *dirVnodeInfo;
3368 struct VnodeDiskObject vnode;
3369 VolumeDiskData volHeader;
3371 int orphaned, rootdirfound = 0;
3372 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3373 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3374 struct VnodeEssence *vep;
3377 afs_sfsize_t nBytes;
3379 VnodeId LFVnode, ThisVnode;
3380 Unique LFUnique, ThisUnique;
3383 vid = rwIsp->volSummary->header.id;
3384 IH_INIT(h, fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3385 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3386 assert(nBytes == sizeof(volHeader));
3387 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3388 assert(volHeader.destroyMe != DESTROY_ME);
3389 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3391 DistilVnodeEssence(vid, vLarge, rwIsp->volSummary->header.largeVnodeIndex,
3393 DistilVnodeEssence(vid, vSmall, rwIsp->volSummary->header.smallVnodeIndex,
3396 dirVnodeInfo = &vnodeInfo[vLarge];
3397 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3398 SalvageDir(volHeader.name, vid, dirVnodeInfo, alinkH, i, &rootdir,
3402 nt_sync(fileSysDevice);
3404 sync(); /* This used to be done lower level, for every dir */
3411 /* Parse each vnode looking for orphaned vnodes and
3412 * connect them to the tree as orphaned (if requested).
3414 oldrootdir = rootdir;
3415 for (class = 0; class < nVNODECLASSES; class++) {
3416 for (v = 0; v < vnodeInfo[class].nVnodes; v++) {
3417 vep = &(vnodeInfo[class].vnodes[v]);
3418 ThisVnode = bitNumberToVnodeNumber(v, class);
3419 ThisUnique = vep->unique;
3421 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3422 continue; /* Ignore unused, claimed, and root vnodes */
3424 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3425 * entry in this vnode had incremented the parent link count (In
3426 * JudgeEntry()). We need to go to the parent and decrement that
3427 * link count. But if the parent's unique is zero, then the parent
3428 * link count was not incremented in JudgeEntry().
3430 if (class == vLarge) { /* directory vnode */
3431 pv = vnodeIdToBitNumber(vep->parent);
3432 if (vnodeInfo[vLarge].vnodes[pv].unique != 0)
3433 vnodeInfo[vLarge].vnodes[pv].count++;
3437 continue; /* If no rootdir, can't attach orphaned files */
3439 /* Here we attach orphaned files and directories into the
3440 * root directory, LVVnode, making sure link counts stay correct.
3442 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3443 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3444 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3446 /* Update this orphaned vnode's info. Its parent info and
3447 * link count (do for orphaned directories and files).
3449 vep->parent = LFVnode; /* Parent is the root dir */
3450 vep->unique = LFUnique;
3453 vep->count--; /* Inc link count (root dir will pt to it) */
3455 /* If this orphaned vnode is a directory, change '..'.
3456 * The name of the orphaned dir/file is unknown, so we
3457 * build a unique name. No need to CopyOnWrite the directory
3458 * since it is not connected to tree in BK or RO volume and
3459 * won't be visible there.
3461 if (class == vLarge) {
3465 /* Remove and recreate the ".." entry in this orphaned directory */
3466 SetSalvageDirHandle(&dh, vid, fileSysDevice,
3467 vnodeInfo[class].inodes[v]);
3469 pa.Unique = LFUnique;
3470 assert(Delete(&dh, "..") == 0);
3471 assert(Create(&dh, "..", &pa) == 0);
3473 /* The original parent's link count was decremented above.
3474 * Here we increment the new parent's link count.
3476 pv = vnodeIdToBitNumber(LFVnode);
3477 vnodeInfo[vLarge].vnodes[pv].count--;
3481 /* Go to the root dir and add this entry. The link count of the
3482 * root dir was incremented when ".." was created. Try 10 times.
3484 for (j = 0; j < 10; j++) {
3485 pa.Vnode = ThisVnode;
3486 pa.Unique = ThisUnique;
3488 (void)afs_snprintf(npath, sizeof npath, "%s.%u.%u",
3490 vLarge) ? "__ORPHANDIR__" :
3491 "__ORPHANFILE__"), ThisVnode,
3494 CopyOnWrite(&rootdir);
3495 code = Create(&rootdir.dirHandle, npath, &pa);
3499 ThisUnique += 50; /* Try creating a different file */
3502 Log("Attaching orphaned %s to volume's root dir as %s\n",
3503 ((class == vLarge) ? "directory" : "file"), npath);
3505 } /* for each vnode in the class */
3506 } /* for each class of vnode */
3508 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
3510 if (!oldrootdir.copied && rootdir.copied) {
3512 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
3515 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
3518 DFlush(); /* Flush the changes */
3519 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
3520 Log("Cannot attach orphaned files and directories: Root directory not found\n");
3521 orphans = ORPH_IGNORE;
3524 /* Write out all changed vnodes. Orphaned files and directories
3525 * will get removed here also (if requested).
3527 for (class = 0; class < nVNODECLASSES; class++) {
3528 int nVnodes = vnodeInfo[class].nVnodes;
3529 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3530 struct VnodeEssence *vnodes = vnodeInfo[class].vnodes;
3531 FilesInVolume += vnodeInfo[class].nAllocatedVnodes;
3532 BlocksInVolume += vnodeInfo[class].volumeBlockCount;
3533 for (i = 0; i < nVnodes; i++) {
3534 register struct VnodeEssence *vnp = &vnodes[i];
3535 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
3537 /* If the vnode is good but is unclaimed (not listed in
3538 * any directory entries), then it is orphaned.
3541 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(vnodeNumber))) {
3542 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
3546 if (vnp->changed || vnp->count) {
3550 IH_IREAD(vnodeInfo[class].handle,
3551 vnodeIndexOffset(vcp, vnodeNumber),
3552 (char *)&vnode, sizeof(vnode));
3553 assert(nBytes == sizeof(vnode));
3555 vnode.parent = vnp->parent;
3556 oldCount = vnode.linkCount;
3557 vnode.linkCount = vnode.linkCount - vnp->count;
3560 orphaned = IsVnodeOrphaned(vnodeNumber);
3562 if (!vnp->todelete) {
3563 /* Orphans should have already been attached (if requested) */
3564 assert(orphans != ORPH_ATTACH);
3565 oblocks += vnp->blockCount;
3568 if (((orphans == ORPH_REMOVE) || vnp->todelete)
3570 BlocksInVolume -= vnp->blockCount;
3572 if (VNDISK_GET_INO(&vnode)) {
3574 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
3577 memset(&vnode, 0, sizeof(vnode));
3579 } else if (vnp->count) {
3581 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
3584 vnode.modeBits = vnp->modeBits;
3587 vnode.dataVersion++;
3590 IH_IWRITE(vnodeInfo[class].handle,
3591 vnodeIndexOffset(vcp, vnodeNumber),
3592 (char *)&vnode, sizeof(vnode));
3593 assert(nBytes == sizeof(vnode));
3599 if (!Showmode && ofiles) {
3600 Log("%s %d orphaned files and directories (approx. %u KB)\n",
3602 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
3606 for (class = 0; class < nVNODECLASSES; class++) {
3607 register struct VnodeInfo *vip = &vnodeInfo[class];
3608 for (i = 0; i < vip->nVnodes; i++)
3609 if (vip->vnodes[i].name)
3610 free(vip->vnodes[i].name);
3617 /* Set correct resource utilization statistics */
3618 volHeader.filecount = FilesInVolume;
3619 volHeader.diskused = BlocksInVolume;
3621 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
3622 if (volHeader.uniquifier < (maxunique + 1)) {
3624 Log("Volume uniquifier is too low; fixed\n");
3625 /* Plus 2,000 in case there are workstations out there with
3626 * cached vnodes that have since been deleted
3628 volHeader.uniquifier = (maxunique + 1 + 2000);
3631 /* Turn off the inUse bit; the volume's been salvaged! */
3632 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
3633 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
3634 volHeader.inService = 1; /* allow service again */
3635 volHeader.needsCallback = (VolumeChanged != 0);
3636 volHeader.dontSalvage = DONT_SALVAGE;
3639 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3640 assert(nBytes == sizeof(volHeader));
3643 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
3644 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
3645 FilesInVolume, BlocksInVolume);
3647 IH_RELEASE(vnodeInfo[vSmall].handle);
3648 IH_RELEASE(vnodeInfo[vLarge].handle);
3654 ClearROInUseBit(struct VolumeSummary *summary)
3656 IHandle_t *h = summary->volumeInfoHandle;
3657 afs_sfsize_t nBytes;
3659 VolumeDiskData volHeader;
3661 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3662 assert(nBytes == sizeof(volHeader));
3663 assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3664 volHeader.inUse = 0;
3665 volHeader.needsSalvaged = 0;
3666 volHeader.inService = 1;
3667 volHeader.dontSalvage = DONT_SALVAGE;
3669 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
3670 assert(nBytes == sizeof(volHeader));
3675 * Possible delete the volume.
3677 * deleteMe - Always do so, only a partial volume.
3680 MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe,
3683 if (readOnly(isp) || deleteMe) {
3684 if (isp->volSummary && isp->volSummary->fileName) {
3687 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
3689 Log("It will be deleted on this server (you may find it elsewhere)\n");
3692 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
3694 Log("it will be deleted instead. It should be recloned.\n");
3699 sprintf(path, "%s/%s", fileSysPath, isp->volSummary->fileName);
3701 code = VDestroyVolumeDiskHeader(fileSysPartition, isp->volumeId, isp->RWvolumeId);
3703 Log("Error %ld destroying volume disk header for volume %lu\n",
3704 afs_printable_int32_ld(code),
3705 afs_printable_uint32_lu(isp->volumeId));
3708 /* make sure we actually delete the fileName file; ENOENT
3709 * is fine, since VDestroyVolumeDiskHeader probably already
3711 if (unlink(path) && errno != ENOENT) {
3712 Log("Unable to unlink %s (errno = %d)\n", path, errno);
3716 } else if (!check) {
3717 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
3719 Abort("Salvage of volume %u aborted\n", isp->volumeId);
3723 #ifdef AFS_DEMAND_ATTACH_FS
3725 * Locks a volume on disk for salvaging.
3727 * @param[in] volumeId volume ID to lock
3729 * @return operation status
3731 * @retval -1 volume lock raced with a fileserver restart; all volumes must
3732 * checked out and locked again
3737 LockVolume(VolumeId volumeId)
3742 /* should always be WRITE_LOCK, but keep the lock-type logic all
3743 * in one place, in VVolLockType. Params will be ignored, but
3744 * try to provide what we're logically doing. */
3745 locktype = VVolLockType(V_VOLUPD, 1);
3747 code = VLockVolumeByIdNB(volumeId, fileSysPartition, locktype);
3749 if (code == EBUSY) {
3750 Abort("Someone else appears to be using volume %lu; Aborted\n",
3751 afs_printable_uint32_lu(volumeId));
3753 Abort("Error %ld trying to lock volume %lu; Aborted\n",
3754 afs_printable_int32_ld(code),
3755 afs_printable_uint32_lu(volumeId));
3758 code = FSYNC_VerifyCheckout(volumeId, fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
3759 if (code == SYNC_DENIED) {
3760 /* need to retry checking out volumes */
3763 if (code != SYNC_OK) {
3764 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
3765 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
3768 /* set inUse = programType in the volume header to ensure that nobody
3769 * tries to use this volume again without salvaging, if we somehow crash
3770 * or otherwise exit before finishing the salvage.
3774 struct VolumeHeader header;
3775 struct VolumeDiskHeader diskHeader;
3776 struct VolumeDiskData volHeader;
3778 code = VReadVolumeDiskHeader(volumeId, fileSysPartition, &diskHeader);
3783 DiskToVolumeHeader(&header, &diskHeader);
3785 IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
3786 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
3787 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
3793 volHeader.inUse = programType;
3795 /* If we can't re-write the header, bail out and error. We don't
3796 * assert when reading the header, since it's possible the
3797 * header isn't really there (when there's no data associated
3798 * with the volume; we just delete the vol header file in that
3799 * case). But if it's there enough that we can read it, but
3800 * somehow we cannot write to it to signify we're salvaging it,
3801 * we've got a big problem and we cannot continue. */
3802 assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
3809 #endif /* AFS_DEMAND_ATTACH_FS */
3812 AskOffline(VolumeId volumeId, char * partition)
3817 memset(&res, 0, sizeof(res));
3819 for (i = 0; i < 3; i++) {
3820 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
3822 if (code == SYNC_OK) {
3824 } else if (code == SYNC_DENIED) {
3825 #ifdef DEMAND_ATTACH_ENABLE
3826 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
3828 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
3830 Abort("Salvage aborted\n");
3831 } else if (code == SYNC_BAD_COMMAND) {
3832 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
3834 #ifdef DEMAND_ATTACH_ENABLE
3835 Log("AskOffline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3837 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3839 Abort("Salvage aborted\n");
3842 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
3843 FSYNC_clientFinis();
3847 if (code != SYNC_OK) {
3848 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
3849 Abort("Salvage aborted\n");
3854 AskOnline(VolumeId volumeId, char *partition)
3858 for (i = 0; i < 3; i++) {
3859 code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
3861 if (code == SYNC_OK) {
3863 } else if (code == SYNC_DENIED) {
3864 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, partition);
3865 } else if (code == SYNC_BAD_COMMAND) {
3866 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
3868 #ifdef DEMAND_ATTACH_ENABLE
3869 Log("AskOnline: please make sure fileserver, volserver, salvageserver and salvager binaries are same version.\n");
3871 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
3876 Log("AskOnline: request for fileserver to take volume offline failed; trying again...\n");
3877 FSYNC_clientFinis();
3884 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
3886 /* Volume parameter is passed in case iopen is upgraded in future to
3887 * require a volume Id to be passed
3890 IHandle_t *srcH, *destH;
3891 FdHandle_t *srcFdP, *destFdP;
3894 IH_INIT(srcH, device, rwvolume, inode1);
3895 srcFdP = IH_OPEN(srcH);
3896 assert(srcFdP != NULL);
3897 IH_INIT(destH, device, rwvolume, inode2);
3898 destFdP = IH_OPEN(destH);
3900 while ((n = FDH_READ(srcFdP, buf, sizeof(buf))) > 0)
3901 assert(FDH_WRITE(destFdP, buf, n) == n);
3903 FDH_REALLYCLOSE(srcFdP);
3904 FDH_REALLYCLOSE(destFdP);
3911 PrintInodeList(void)
3913 register struct ViceInodeInfo *ip;
3914 struct ViceInodeInfo *buf;
3915 struct afs_stat status;
3916 register int nInodes;
3918 assert(afs_fstat(inodeFd, &status) == 0);
3919 buf = (struct ViceInodeInfo *)malloc(status.st_size);
3920 assert(buf != NULL);
3921 nInodes = status.st_size / sizeof(struct ViceInodeInfo);
3922 assert(read(inodeFd, buf, status.st_size) == status.st_size);
3923 for (ip = buf; nInodes--; ip++) {
3924 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
3925 PrintInode(NULL, ip->inodeNumber), ip->linkCount,
3926 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
3927 ip->u.param[2], ip->u.param[3]);
3933 PrintInodeSummary(void)
3936 struct InodeSummary *isp;
3938 for (i = 0; i < nVolumesInInodeFile; i++) {
3939 isp = &inodeSummary[i];
3940 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
3945 PrintVolumeSummary(void)
3948 struct VolumeSummary *vsp;
3950 for (i = 0, vsp = volumeSummaryp; i < nVolumes; vsp++, i++) {
3951 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
3961 assert(0); /* Fork is never executed in the NT code path */
3965 #ifdef AFS_DEMAND_ATTACH_FS
3966 if ((f == 0) && (programType == salvageServer)) {
3967 /* we are a salvageserver child */
3968 #ifdef FSSYNC_BUILD_CLIENT
3969 VChildProcReconnectFS_r();
3971 #ifdef SALVSYNC_BUILD_CLIENT
3975 #endif /* AFS_DEMAND_ATTACH_FS */
3976 #endif /* !AFS_NT40_ENV */
3986 #ifdef AFS_DEMAND_ATTACH_FS
3987 if (programType == salvageServer) {
3988 #ifdef SALVSYNC_BUILD_CLIENT
3991 #ifdef FSSYNC_BUILD_CLIENT
3995 #endif /* AFS_DEMAND_ATTACH_FS */
3998 if (main_thread != pthread_self())
3999 pthread_exit((void *)code);
4012 pid = wait(&status);
4014 if (WCOREDUMP(status))
4015 Log("\"%s\" core dumped!\n", prog);
4016 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4022 TimeStamp(time_t clock, int precision)
4025 static char timestamp[20];
4026 lt = localtime(&clock);
4028 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4030 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4035 CheckLogFile(char * log_path)
4037 char oldSlvgLog[AFSDIR_PATH_MAX];
4039 #ifndef AFS_NT40_ENV
4046 strcpy(oldSlvgLog, log_path);
4047 strcat(oldSlvgLog, ".old");
4049 renamefile(log_path, oldSlvgLog);
4050 logFile = afs_fopen(log_path, "a");
4052 if (!logFile) { /* still nothing, use stdout */
4056 #ifndef AFS_NAMEI_ENV
4057 AFS_DEBUG_IOPS_LOG(logFile);
4062 #ifndef AFS_NT40_ENV
4064 TimeStampLogFile(char * log_path)
4066 char stampSlvgLog[AFSDIR_PATH_MAX];
4071 lt = localtime(&now);
4072 (void)afs_snprintf(stampSlvgLog, sizeof stampSlvgLog,
4073 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4074 log_path, lt->tm_year + 1900,
4075 lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min,
4078 /* try to link the logfile to a timestamped filename */
4079 /* if it fails, oh well, nothing we can do */
4080 link(log_path, stampSlvgLog);
4089 #ifndef AFS_NT40_ENV
4091 printf("Can't show log since using syslog.\n");
4102 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4105 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4108 while (fgets(line, sizeof(line), logFile))
4115 Log(const char *format, ...)
4121 va_start(args, format);
4122 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4124 #ifndef AFS_NT40_ENV
4126 syslog(LOG_INFO, "%s", tmp);
4130 gettimeofday(&now, 0);
4131 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4137 Abort(const char *format, ...)
4142 va_start(args, format);
4143 (void)afs_vsnprintf(tmp, sizeof tmp, format, args);
4145 #ifndef AFS_NT40_ENV
4147 syslog(LOG_INFO, "%s", tmp);
4151 fprintf(logFile, "%s", tmp);
4163 ToString(const char *s)
4166 p = (char *)malloc(strlen(s) + 1);
4172 /* Remove the FORCESALVAGE file */
4174 RemoveTheForce(char *path)
4177 struct afs_stat force; /* so we can use afs_stat to find it */
4178 strcpy(target,path);
4179 strcat(target,"/FORCESALVAGE");
4180 if (!Testing && ForceSalvage) {
4181 if (afs_stat(target,&force) == 0) unlink(target);
4185 #ifndef AFS_AIX32_ENV
4187 * UseTheForceLuke - see if we can use the force
4190 UseTheForceLuke(char *path)
4192 struct afs_stat force;
4194 strcpy(target,path);
4195 strcat(target,"/FORCESALVAGE");
4197 return (afs_stat(target, &force) == 0);
4201 * UseTheForceLuke - see if we can use the force
4204 * The VRMIX fsck will not muck with the filesystem it is supposedly
4205 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4206 * muck directly with the root inode, which is within the normal
4208 * ListViceInodes() has a side effect of setting ForceSalvage if
4209 * it detects a need, based on root inode examination.
4212 UseTheForceLuke(char *path)
4215 return 0; /* sorry OB1 */
4220 /* NT support routines */
4222 static char execpathname[MAX_PATH];
4224 nt_SalvagePartition(char *partName, int jobn)
4229 if (!*execpathname) {
4230 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4231 if (!n || n == 1023)
4234 job.cj_magic = SALVAGER_MAGIC;
4235 job.cj_number = jobn;
4236 (void)strcpy(job.cj_part, partName);
4237 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4242 nt_SetupPartitionSalvage(void *datap, int len)
4244 childJob_t *jobp = (childJob_t *) datap;
4245 char logname[AFSDIR_PATH_MAX];
4247 if (len != sizeof(childJob_t))
4249 if (jobp->cj_magic != SALVAGER_MAGIC)
4254 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4256 logFile = afs_fopen(logname, "w");
4264 #endif /* AFS_NT40_ENV */