2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
105 #if defined(AFS_VFSINCL_ENV)
106 #include <sys/vnode.h>
108 #include <sys/fs/ufs_inode.h>
110 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
111 #include <ufs/ufs/dinode.h>
112 #include <ufs/ffs/fs.h>
114 #include <ufs/inode.h>
117 #else /* AFS_VFSINCL_ENV */
119 #include <ufs/inode.h>
120 #else /* AFS_OSF_ENV */
121 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
122 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
160 #include <afs/afssyscalls.h>
164 #include "partition.h"
165 #include "daemon_com.h"
166 #include "daemon_com_inline.h"
168 #include "volume_inline.h"
169 #include "salvsync.h"
170 #include "viceinode.h"
172 #include "volinodes.h" /* header magic number, etc. stuff */
173 #include "vol-salvage.h"
175 #include "vol_internal.h"
177 #include <afs/prs_fs.h>
179 #ifdef FSSYNC_BUILD_CLIENT
180 #include "vg_cache.h"
188 extern void *calloc();
190 static char *TimeStamp(time_t clock, int precision);
193 int debug; /* -d flag */
194 extern int Testing; /* -n flag */
195 int ListInodeOption; /* -i flag */
196 int ShowRootFiles; /* -r flag */
197 int RebuildDirs; /* -sal flag */
198 int Parallel = 4; /* -para X flag */
199 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
200 int forceR = 0; /* -b flag */
201 int ShowLog = 0; /* -showlog flag */
202 int ShowSuid = 0; /* -showsuid flag */
203 int ShowMounts = 0; /* -showmounts flag */
204 int orphans = ORPH_IGNORE; /* -orphans option */
209 int useSyslog = 0; /* -syslog flag */
210 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
219 #define MAXPARALLEL 32
221 int OKToZap; /* -o flag */
222 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
223 * in the volume header */
225 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
227 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
230 * information that is 'global' to a particular salvage job.
233 Device fileSysDevice; /**< The device number of the current partition
235 char fileSysPath[9]; /**< The path of the mounted partition currently
236 * being salvaged, i.e. the directory containing
237 * the volume headers */
238 char *fileSysPathName; /**< NT needs this to make name pretty log. */
239 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
240 int VGLinkH_cnt; /**< # of references to lnk handle. */
241 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
244 char *fileSysDeviceName; /**< The block device where the file system being
245 * salvaged was mounted */
246 char *filesysfulldev;
248 int VolumeChanged; /**< Set by any routine which would change the
249 * volume in a way which would require callbacks
250 * to be broken if the volume was put back on
251 * on line by an active file server */
253 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
254 * header dealt with */
256 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
257 FD_t inodeFd; /**< File descriptor for inode file */
259 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
260 int nVolumes; /**< Number of volumes (read-write and read-only)
261 * in volume summary */
262 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
265 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
266 * vnodes in the volume that
267 * we are currently looking
269 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
270 * to contact the fileserver over FSYNC */
277 /* Forward declarations */
278 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
279 static int AskVolumeSummary(struct SalvInfo *salvinfo,
280 VolumeId singleVolumeNumber);
281 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
282 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
284 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
285 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
286 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
288 /* Uniquifier stored in the Inode */
293 return (u & 0x3fffff);
295 #if defined(AFS_SGI_EXMAG)
296 return (u & SGI_UNIQMASK);
299 #endif /* AFS_SGI_EXMAG */
306 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
308 return 0; /* otherwise may be transient, e.g. EMFILE */
313 char *save_args[MAX_ARGS];
315 extern pthread_t main_thread;
316 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
320 * Get the salvage lock if not already held. Hold until process exits.
322 * @param[in] locktype READ_LOCK or WRITE_LOCK
325 _ObtainSalvageLock(int locktype)
327 struct VLockFile salvageLock;
332 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
334 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
337 "salvager: There appears to be another salvager running! "
342 "salvager: Error %d trying to acquire salvage lock! "
348 ObtainSalvageLock(void)
350 _ObtainSalvageLock(WRITE_LOCK);
353 ObtainSharedSalvageLock(void)
355 _ObtainSalvageLock(READ_LOCK);
359 #ifdef AFS_SGI_XFS_IOPS_ENV
360 /* Check if the given partition is mounted. For XFS, the root inode is not a
361 * constant. So we check the hard way.
364 IsPartitionMounted(char *part)
367 struct mntent *mntent;
369 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
370 while (mntent = getmntent(mntfp)) {
371 if (!strcmp(part, mntent->mnt_dir))
376 return mntent ? 1 : 1;
379 /* Check if the given inode is the root of the filesystem. */
380 #ifndef AFS_SGI_XFS_IOPS_ENV
382 IsRootInode(struct afs_stat_st *status)
385 * The root inode is not a fixed value in XFS partitions. So we need to
386 * see if the partition is in the list of mounted partitions. This only
387 * affects the SalvageFileSys path, so we check there.
389 return (status->st_ino == ROOTINODE);
394 #ifndef AFS_NAMEI_ENV
395 /* We don't want to salvage big files filesystems, since we can't put volumes on
399 CheckIfBigFilesFS(char *mountPoint, char *devName)
401 struct superblock fs;
404 if (strncmp(devName, "/dev/", 5)) {
405 (void)sprintf(name, "/dev/%s", devName);
407 (void)strcpy(name, devName);
410 if (ReadSuper(&fs, name) < 0) {
411 Log("Unable to read superblock. Not salvaging partition %s.\n",
415 if (IsBigFilesFileSystem(&fs)) {
416 Log("Partition %s is a big files filesystem, not salvaging.\n",
426 #define HDSTR "\\Device\\Harddisk"
427 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
429 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
435 static int dowarn = 1;
437 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
439 if (strncmp(res1, HDSTR, HDLEN)) {
442 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
443 res1, HDSTR, p1->devName);
446 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
448 if (strncmp(res2, HDSTR, HDLEN)) {
451 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
452 res2, HDSTR, p2->devName);
456 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
459 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
462 /* This assumes that two partitions with the same device number divided by
463 * PartsPerDisk are on the same disk.
466 SalvageFileSysParallel(struct DiskPartition64 *partP)
469 struct DiskPartition64 *partP;
470 int pid; /* Pid for this job */
471 int jobnumb; /* Log file job number */
472 struct job *nextjob; /* Next partition on disk to salvage */
474 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
475 struct job *thisjob = 0;
476 static int numjobs = 0;
477 static int jobcount = 0;
483 char logFileName[256];
487 /* We have a partition to salvage. Copy it into thisjob */
488 thisjob = (struct job *)malloc(sizeof(struct job));
490 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
493 memset(thisjob, 0, sizeof(struct job));
494 thisjob->partP = partP;
495 thisjob->jobnumb = jobcount;
497 } else if (jobcount == 0) {
498 /* We are asking to wait for all jobs (partp == 0), yet we never
501 Log("No file system partitions named %s* found; not salvaged\n",
502 VICE_PARTITION_PREFIX);
506 if (debug || Parallel == 1) {
508 SalvageFileSys(thisjob->partP, 0);
515 /* Check to see if thisjob is for a disk that we are already
516 * salvaging. If it is, link it in as the next job to do. The
517 * jobs array has 1 entry per disk being salvages. numjobs is
518 * the total number of disks currently being salvaged. In
519 * order to keep thejobs array compact, when a disk is
520 * completed, the hightest element in the jobs array is moved
521 * down to now open slot.
523 for (j = 0; j < numjobs; j++) {
524 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
525 /* On same disk, add it to this list and return */
526 thisjob->nextjob = jobs[j]->nextjob;
527 jobs[j]->nextjob = thisjob;
534 /* Loop until we start thisjob or until all existing jobs are finished */
535 while (thisjob || (!partP && (numjobs > 0))) {
536 startjob = -1; /* No new job to start */
538 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
539 /* Either the max jobs are running or we have to wait for all
540 * the jobs to finish. In either case, we wait for at least one
541 * job to finish. When it's done, clean up after it.
543 pid = wait(&wstatus);
544 osi_Assert(pid != -1);
545 for (j = 0; j < numjobs; j++) { /* Find which job it is */
546 if (pid == jobs[j]->pid)
549 osi_Assert(j < numjobs);
550 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
551 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
554 numjobs--; /* job no longer running */
555 oldjob = jobs[j]; /* remember */
556 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
557 free(oldjob); /* free the old job */
559 /* If there is another partition on the disk to salvage, then
560 * say we will start it (startjob). If not, then put thisjob there
561 * and say we will start it.
563 if (jobs[j]) { /* Another partitions to salvage */
564 startjob = j; /* Will start it */
565 } else { /* There is not another partition to salvage */
567 jobs[j] = thisjob; /* Add thisjob */
569 startjob = j; /* Will start it */
571 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
572 startjob = -1; /* Don't start it - already running */
576 /* We don't have to wait for a job to complete */
578 jobs[numjobs] = thisjob; /* Add this job */
580 startjob = numjobs; /* Will start it */
584 /* Start up a new salvage job on a partition in job slot "startjob" */
585 if (startjob != -1) {
587 Log("Starting salvage of file system partition %s\n",
588 jobs[startjob]->partP->name);
590 /* For NT, we not only fork, but re-exec the salvager. Pass in the
591 * commands and pass the child job number via the data path.
594 nt_SalvagePartition(jobs[startjob]->partP->name,
595 jobs[startjob]->jobnumb);
596 jobs[startjob]->pid = pid;
601 jobs[startjob]->pid = pid;
607 for (fd = 0; fd < 16; fd++)
614 openlog("salvager", LOG_PID, useSyslogFacility);
618 snprintf(logFileName, sizeof logFileName, "%s.%d",
619 AFSDIR_SERVER_SLVGLOG_FILEPATH,
620 jobs[startjob]->jobnumb);
621 logFile = afs_fopen(logFileName, "w");
626 SalvageFileSys1(jobs[startjob]->partP, 0);
631 } /* while ( thisjob || (!partP && numjobs > 0) ) */
633 /* If waited for all jobs to complete, now collect log files and return */
635 if (!useSyslog) /* if syslogging - no need to collect */
638 for (i = 0; i < jobcount; i++) {
639 snprintf(logFileName, sizeof logFileName, "%s.%d",
640 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
641 if ((passLog = afs_fopen(logFileName, "r"))) {
642 while (fgets(buf, sizeof(buf), passLog)) {
647 (void)unlink(logFileName);
656 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
658 if (!canfork || debug || Fork() == 0) {
659 SalvageFileSys1(partP, singleVolumeNumber);
660 if (canfork && !debug) {
665 Wait("SalvageFileSys");
669 get_DevName(char *pbuffer, char *wpath)
671 char pbuf[128], *ptr;
672 strcpy(pbuf, pbuffer);
673 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
679 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
681 strcpy(pbuffer, ptr + 1);
688 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
691 char inodeListPath[256];
692 FD_t inodeFile = INVALID_FD;
693 static char tmpDevName[100];
694 static char wpath[100];
695 struct VolumeSummary *vsp, *esp;
699 struct SalvInfo l_salvinfo;
700 struct SalvInfo *salvinfo = &l_salvinfo;
703 memset(salvinfo, 0, sizeof(*salvinfo));
706 if (inodeFile != INVALID_FD) {
708 inodeFile = INVALID_FD;
710 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
711 Abort("Raced too many times with fileserver restarts while trying to "
712 "checkout/lock volumes; Aborted\n");
714 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
716 /* unlock all previous volume locks, since we're about to lock them
718 VLockFileReinit(&partP->volLockFile);
720 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
722 salvinfo->fileSysPartition = partP;
723 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
724 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
727 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
728 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
729 name = partP->devName;
731 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
732 strcpy(tmpDevName, partP->devName);
733 name = get_DevName(tmpDevName, wpath);
734 salvinfo->fileSysDeviceName = name;
735 salvinfo->filesysfulldev = wpath;
738 if (singleVolumeNumber) {
739 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
740 /* only non-DAFS locks the partition when salvaging a single volume;
741 * DAFS will lock the individual volumes in the VG */
742 VLockPartition(partP->name);
743 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
747 /* salvageserver already setup fssync conn for us */
748 if ((programType != salvageServer) && !VConnectFS()) {
749 Abort("Couldn't connect to file server\n");
752 salvinfo->useFSYNC = 1;
753 AskOffline(salvinfo, singleVolumeNumber);
754 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
755 if (LockVolume(salvinfo, singleVolumeNumber)) {
758 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
761 salvinfo->useFSYNC = 0;
762 VLockPartition(partP->name);
766 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
769 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
770 partP->name, name, (Testing ? "(READONLY mode)" : ""));
772 Log("***Forced salvage of all volumes on this partition***\n");
777 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
784 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
785 while ((dp = readdir(dirp))) {
786 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
787 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
789 Log("Removing old salvager temp files %s\n", dp->d_name);
790 strcpy(npath, salvinfo->fileSysPath);
791 strcat(npath, OS_DIRSEP);
792 strcat(npath, dp->d_name);
798 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
800 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
801 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
803 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
807 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
808 if (inodeFile == INVALID_FD) {
809 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
812 /* Using nt_unlink here since we're really using the delete on close
813 * semantics of unlink. In most places in the salvager, we really do
814 * mean to unlink the file at that point. Those places have been
815 * modified to actually do that so that the NT crt can be used there.
817 * jaltman - On NT delete on close cannot be applied to a file while the
818 * process has an open file handle that does not have DELETE file
819 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
820 * delete privileges. As a result the nt_unlink() call will always
823 code = nt_unlink(inodeListPath);
825 code = unlink(inodeListPath);
828 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
831 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
835 salvinfo->inodeFd = inodeFile;
836 if (salvinfo->inodeFd == INVALID_FD)
837 Abort("Temporary file %s is missing...\n", inodeListPath);
838 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
839 if (ListInodeOption) {
840 PrintInodeList(salvinfo);
841 if (singleVolumeNumber) {
842 /* We've checked out the volume from the fileserver, and we need
843 * to give it back. We don't know if the volume exists or not,
844 * so we don't know whether to AskOnline or not. Try to determine
845 * if the volume exists by trying to read the volume header, and
846 * AskOnline if it is readable. */
847 MaybeAskOnline(salvinfo, singleVolumeNumber);
851 /* enumerate volumes in the partition.
852 * figure out sets of read-only + rw volumes.
853 * salvage each set, read-only volumes first, then read-write.
854 * Fix up inodes on last volume in set (whether it is read-write
857 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
861 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
862 i < salvinfo->nVolumesInInodeFile; i = j) {
863 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
865 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
867 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
868 struct VolumeSummary *tsp;
869 /* Scan volume list (from partition root directory) looking for the
870 * current rw volume number in the volume list from the inode scan.
871 * If there is one here that is not in the inode volume list,
873 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
875 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
877 /* Now match up the volume summary info from the root directory with the
878 * entry in the volume list obtained from scanning inodes */
879 salvinfo->inodeSummary[j].volSummary = NULL;
880 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
881 if (tsp->header.id == vid) {
882 salvinfo->inodeSummary[j].volSummary = tsp;
888 /* Salvage the group of volumes (several read-only + 1 read/write)
889 * starting with the current read-only volume we're looking at.
892 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
894 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
895 #endif /* AFS_NT40_ENV */
899 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
900 for (; vsp < esp; vsp++) {
902 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
905 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
906 RemoveTheForce(salvinfo->fileSysPath);
908 if (!Testing && singleVolumeNumber) {
910 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
911 /* unlock vol headers so the fs can attach them when we AskOnline */
912 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
913 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
915 /* Step through the volumeSummary list and set all volumes on-line.
916 * Most volumes were taken off-line in GetVolumeSummary.
917 * If a volume was deleted, don't tell the fileserver anything, since
918 * we already told the fileserver the volume was deleted back when we
919 * we destroyed the volume header.
920 * Also, make sure we bring the singleVolumeNumber back online first.
923 for (j = 0; j < salvinfo->nVolumes; j++) {
924 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
926 if (!salvinfo->volumeSummaryp[j].deleted) {
927 AskOnline(salvinfo, singleVolumeNumber);
933 /* If singleVolumeNumber is not in our volumeSummary, it means that
934 * at least one other volume in the VG is on the partition, but the
935 * RW volume is not. We've already AskOffline'd it by now, though,
936 * so make sure we don't still have the volume checked out. */
937 AskDelete(salvinfo, singleVolumeNumber);
940 for (j = 0; j < salvinfo->nVolumes; j++) {
941 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
942 if (!salvinfo->volumeSummaryp[j].deleted) {
943 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
949 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
950 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
953 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
957 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
960 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, vsp->fileName);
963 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
966 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
968 Log("Error %ld destroying volume disk header for volume %lu\n",
969 afs_printable_int32_ld(code),
970 afs_printable_uint32_lu(vsp->header.id));
973 /* make sure we actually delete the fileName file; ENOENT
974 * is fine, since VDestroyVolumeDiskHeader probably already
976 if (unlink(path) && errno != ENOENT) {
977 Log("Unable to unlink %s (errno = %d)\n", path, errno);
979 if (salvinfo->useFSYNC) {
980 AskDelete(salvinfo, vsp->header.id);
988 CompareInodes(const void *_p1, const void *_p2)
990 const struct ViceInodeInfo *p1 = _p1;
991 const struct ViceInodeInfo *p2 = _p2;
992 if (p1->u.vnode.vnodeNumber == INODESPECIAL
993 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
994 VolumeId p1rwid, p2rwid;
996 (p1->u.vnode.vnodeNumber ==
997 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
999 (p2->u.vnode.vnodeNumber ==
1000 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1001 if (p1rwid < p2rwid)
1003 if (p1rwid > p2rwid)
1005 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1006 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1007 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1008 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1009 if (p1->u.vnode.volumeId == p1rwid)
1011 if (p2->u.vnode.volumeId == p2rwid)
1013 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1015 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1016 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1017 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1019 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1021 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1023 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1025 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1027 /* The following tests are reversed, so that the most desirable
1028 * of several similar inodes comes first */
1029 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1030 #ifdef AFS_3DISPARES
1031 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1032 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1035 #ifdef AFS_SGI_EXMAG
1036 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1037 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1042 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1043 #ifdef AFS_3DISPARES
1044 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1045 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1048 #ifdef AFS_SGI_EXMAG
1049 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1050 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1055 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1056 #ifdef AFS_3DISPARES
1057 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1058 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1061 #ifdef AFS_SGI_EXMAG
1062 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1063 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1068 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1069 #ifdef AFS_3DISPARES
1070 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1071 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1074 #ifdef AFS_SGI_EXMAG
1075 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1076 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1085 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1086 struct InodeSummary *summary)
1088 VolumeId volume = ip->u.vnode.volumeId;
1089 VolumeId rwvolume = volume;
1094 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1096 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1098 rwvolume = ip->u.special.parentId;
1099 /* This isn't quite right, as there could (in error) be different
1100 * parent inodes in different special vnodes */
1102 if (maxunique < ip->u.vnode.vnodeUniquifier)
1103 maxunique = ip->u.vnode.vnodeUniquifier;
1107 summary->volumeId = volume;
1108 summary->RWvolumeId = rwvolume;
1109 summary->nInodes = n;
1110 summary->nSpecialInodes = nSpecial;
1111 summary->maxUniquifier = maxunique;
1115 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1117 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1118 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1119 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1124 * Collect list of inodes in file named by path. If a truly fatal error,
1125 * unlink the file and abort. For lessor errors, return -1. The file will
1126 * be unlinked by the caller.
1129 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1133 struct ViceInodeInfo *ip, *ip_save;
1134 struct InodeSummary summary;
1135 char summaryFileName[50];
1136 FD_t summaryFile = INVALID_FD;
1138 char *dev = salvinfo->fileSysPath;
1139 char *wpath = salvinfo->fileSysPath;
1141 char *dev = salvinfo->fileSysDeviceName;
1142 char *wpath = salvinfo->filesysfulldev;
1144 char *part = salvinfo->fileSysPath;
1149 afs_sfsize_t st_size;
1151 /* This file used to come from vfsck; cobble it up ourselves now... */
1153 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1154 singleVolumeNumber ? OnlyOneVolume : 0,
1155 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1157 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1161 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1163 if (forceSal && !ForceSalvage) {
1164 Log("***Forced salvage of all volumes on this partition***\n");
1167 OS_SEEK(inodeFile, 0L, SEEK_SET);
1168 salvinfo->inodeFd = inodeFile;
1169 if (salvinfo->inodeFd == INVALID_FD ||
1170 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1171 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1173 tdir = (tmpdir ? tmpdir : part);
1175 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1176 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1178 snprintf(summaryFileName, sizeof summaryFileName,
1179 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1181 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1182 if (summaryFile == INVALID_FD) {
1183 Abort("Unable to create inode summary file\n");
1187 /* Using nt_unlink here since we're really using the delete on close
1188 * semantics of unlink. In most places in the salvager, we really do
1189 * mean to unlink the file at that point. Those places have been
1190 * modified to actually do that so that the NT crt can be used there.
1192 * jaltman - As commented elsewhere, this cannot work because fopen()
1193 * does not open files with DELETE and FILE_SHARE_DELETE.
1195 code = nt_unlink(summaryFileName);
1197 code = unlink(summaryFileName);
1200 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1203 if (!canfork || debug || Fork() == 0) {
1204 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1206 OS_CLOSE(summaryFile);
1207 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1208 RemoveTheForce(salvinfo->fileSysPath);
1210 struct VolumeSummary *vsp;
1214 GetVolumeSummary(salvinfo, singleVolumeNumber);
1216 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1217 if (vsp->fileName) {
1218 if (vsp->header.id == singleVolumeNumber) {
1221 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1227 MaybeAskOnline(salvinfo, singleVolumeNumber);
1229 /* make sure we get rid of stray .vol headers, even if
1230 * they're not in our volume summary (might happen if
1231 * e.g. something else created them and they're not in the
1232 * fileserver VGC) */
1233 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1234 singleVolumeNumber, 0 /*parent*/);
1235 AskDelete(salvinfo, singleVolumeNumber);
1239 Log("%s vice inodes on %s; not salvaged\n",
1240 singleVolumeNumber ? "No applicable" : "No", dev);
1245 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1247 OS_CLOSE(summaryFile);
1249 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1252 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1253 OS_CLOSE(summaryFile);
1254 Abort("Unable to read inode table; %s not salvaged\n", dev);
1256 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1257 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1258 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1259 OS_CLOSE(summaryFile);
1260 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1265 CountVolumeInodes(ip, nInodes, &summary);
1266 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1267 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1268 OS_CLOSE(summaryFile);
1272 summary.index += (summary.nInodes);
1273 nInodes -= summary.nInodes;
1274 ip += summary.nInodes;
1277 ip = ip_save = NULL;
1278 /* Following fflush is not fclose, because if it was debug mode would not work */
1279 if (OS_SYNC(summaryFile) == -1) {
1280 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1281 OS_CLOSE(summaryFile);
1285 if (canfork && !debug) {
1290 if (Wait("Inode summary") == -1) {
1291 OS_CLOSE(summaryFile);
1292 Exit(1); /* salvage of this partition aborted */
1296 st_size = OS_SIZE(summaryFile);
1297 osi_Assert(st_size >= 0);
1300 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1301 osi_Assert(salvinfo->inodeSummary != NULL);
1302 /* For GNU we need to do lseek to get the file pointer moved. */
1303 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1304 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1305 osi_Assert(ret == st_size);
1307 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1308 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1309 salvinfo->inodeSummary[i].volSummary = NULL;
1311 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1312 OS_CLOSE(summaryFile);
1315 if (retcode && singleVolumeNumber && !deleted) {
1316 AskError(salvinfo, singleVolumeNumber);
1322 /* Comparison routine for volume sort.
1323 This is setup so that a read-write volume comes immediately before
1324 any read-only clones of that volume */
1326 CompareVolumes(const void *_p1, const void *_p2)
1328 const struct VolumeSummary *p1 = _p1;
1329 const struct VolumeSummary *p2 = _p2;
1330 if (p1->header.parent != p2->header.parent)
1331 return p1->header.parent < p2->header.parent ? -1 : 1;
1332 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1334 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1336 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1340 * Gleans volumeSummary information by asking the fileserver
1342 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1343 * salvaging a whole partition
1345 * @return whether we obtained the volume summary information or not
1346 * @retval 0 success; we obtained the volume summary information
1347 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1349 * @retval 1 we did not get the volume summary information; either the
1350 * fileserver responded with an error, or we are not supposed to
1351 * ask the fileserver for the information (e.g. we are salvaging
1352 * the entire partition or we are not the salvageserver)
1354 * @note for non-DAFS, always returns 1
1357 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1360 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1361 if (programType == salvageServer) {
1362 if (singleVolumeNumber) {
1363 FSSYNC_VGQry_response_t q_res;
1365 struct VolumeSummary *vsp;
1367 struct VolumeDiskHeader diskHdr;
1369 memset(&res, 0, sizeof(res));
1371 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1374 * We must wait for the partition to finish scanning before
1375 * can continue, since we will not know if we got the entire
1376 * VG membership unless the partition is fully scanned.
1377 * We could, in theory, just scan the partition ourselves if
1378 * the VG cache is not ready, but we would be doing the exact
1379 * same scan the fileserver is doing; it will almost always
1380 * be faster to wait for the fileserver. The only exceptions
1381 * are if the partition does not take very long to scan, and
1382 * in that case it's fast either way, so who cares?
1384 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1385 Log("waiting for fileserver to finish scanning partition %s...\n",
1386 salvinfo->fileSysPartition->name);
1388 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1389 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1390 * just so small partitions don't need to wait over 10
1391 * seconds every time, and large partitions are generally
1392 * polled only once every ten seconds. */
1393 sleep((i > 10) ? (i = 10) : i);
1395 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1399 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1400 /* This can happen if there's no header for the volume
1401 * we're salvaging, or no headers exist for the VG (if
1402 * we're salvaging an RW). Act as if we got a response
1403 * with no VG members. The headers may be created during
1404 * salvaging, if there are inodes in this VG. */
1406 memset(&q_res, 0, sizeof(q_res));
1407 q_res.rw = singleVolumeNumber;
1411 Log("fileserver refused VGCQuery request for volume %lu on "
1412 "partition %s, code %ld reason %ld\n",
1413 afs_printable_uint32_lu(singleVolumeNumber),
1414 salvinfo->fileSysPartition->name,
1415 afs_printable_int32_ld(code),
1416 afs_printable_int32_ld(res.hdr.reason));
1420 if (q_res.rw != singleVolumeNumber) {
1421 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1422 afs_printable_uint32_lu(singleVolumeNumber),
1423 afs_printable_uint32_lu(q_res.rw));
1424 #ifdef SALVSYNC_BUILD_CLIENT
1425 if (SALVSYNC_LinkVolume(q_res.rw,
1427 salvinfo->fileSysPartition->name,
1429 Log("schedule request failed\n");
1431 #endif /* SALVSYNC_BUILD_CLIENT */
1432 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1435 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1436 osi_Assert(salvinfo->volumeSummaryp != NULL);
1438 salvinfo->nVolumes = 0;
1439 vsp = salvinfo->volumeSummaryp;
1441 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1442 char name[VMAXPATHLEN];
1444 if (!q_res.children[i]) {
1448 /* AskOffline for singleVolumeNumber was called much earlier */
1449 if (q_res.children[i] != singleVolumeNumber) {
1450 AskOffline(salvinfo, q_res.children[i]);
1451 if (LockVolume(salvinfo, q_res.children[i])) {
1457 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1459 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1460 afs_printable_uint32_lu(q_res.children[i]));
1465 DiskToVolumeHeader(&vsp->header, &diskHdr);
1466 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1467 vsp->fileName = ToString(name);
1468 salvinfo->nVolumes++;
1472 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1477 Log("Cannot get volume summary from fileserver; falling back to scanning "
1478 "entire partition\n");
1481 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1486 * count how many volume headers are found by VWalkVolumeHeaders.
1488 * @param[in] dp the disk partition (unused)
1489 * @param[in] name full path to the .vol header (unused)
1490 * @param[in] hdr the header data (unused)
1491 * @param[in] last whether this is the last try or not (unused)
1492 * @param[in] rock actually an afs_int32*; the running count of how many
1493 * volumes we have found
1498 CountHeader(struct DiskPartition64 *dp, const char *name,
1499 struct VolumeDiskHeader *hdr, int last, void *rock)
1501 afs_int32 *nvols = (afs_int32 *)rock;
1507 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1510 struct SalvageScanParams {
1511 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1512 * vol id of the VG we're salvaging */
1513 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1514 * we're filling in */
1515 afs_int32 nVolumes; /**< # of vols we've encountered */
1516 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1517 * # of vols we've alloc'd memory for) */
1518 int retry; /**< do we need to retry vol lock/checkout? */
1519 struct SalvInfo *salvinfo; /**< salvage job info */
1523 * records volume summary info found from VWalkVolumeHeaders.
1525 * Found volumes are also taken offline if they are in the specific volume
1526 * group we are looking for.
1528 * @param[in] dp the disk partition
1529 * @param[in] name full path to the .vol header
1530 * @param[in] hdr the header data
1531 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1532 * @param[in] rock actually a struct SalvageScanParams*, containing the
1533 * information needed to record the volume summary data
1535 * @return operation status
1537 * @retval -1 volume locking raced with fileserver restart; checking out
1538 * and locking volumes needs to be retried
1539 * @retval 1 volume header is mis-named and should be deleted
1542 RecordHeader(struct DiskPartition64 *dp, const char *name,
1543 struct VolumeDiskHeader *hdr, int last, void *rock)
1545 char nameShouldBe[64];
1546 struct SalvageScanParams *params;
1547 struct VolumeSummary summary;
1548 VolumeId singleVolumeNumber;
1549 struct SalvInfo *salvinfo;
1551 params = (struct SalvageScanParams *)rock;
1553 memset(&summary, 0, sizeof(summary));
1555 singleVolumeNumber = params->singleVolumeNumber;
1556 salvinfo = params->salvinfo;
1558 DiskToVolumeHeader(&summary.header, hdr);
1560 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1561 && summary.header.parent != singleVolumeNumber) {
1563 if (programType == salvageServer) {
1564 #ifdef SALVSYNC_BUILD_CLIENT
1565 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1566 summary.header.id, summary.header.parent);
1567 if (SALVSYNC_LinkVolume(summary.header.parent,
1571 Log("schedule request failed\n");
1574 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1577 Log("%u is a read-only volume; not salvaged\n",
1578 singleVolumeNumber);
1583 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1584 || summary.header.parent == singleVolumeNumber) {
1586 /* check if the header file is incorrectly named */
1588 const char *base = strrchr(name, OS_DIRSEPC);
1595 snprintf(nameShouldBe, sizeof nameShouldBe,
1596 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1599 if (strcmp(nameShouldBe, base)) {
1600 /* .vol file has wrong name; retry/delete */
1604 if (!badname || last) {
1605 /* only offline the volume if the header is good, or if this is
1606 * the last try looking at it; avoid AskOffline'ing the same vol
1609 if (singleVolumeNumber
1610 && summary.header.id != singleVolumeNumber) {
1611 /* don't offline singleVolumeNumber; we already did that
1614 AskOffline(salvinfo, summary.header.id);
1616 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1618 /* don't lock the volume if the header is bad, since we're
1619 * about to delete it anyway. */
1620 if (LockVolume(salvinfo, summary.header.id)) {
1625 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1629 if (last && !Showmode) {
1630 Log("Volume header file %s is incorrectly named (should be %s "
1631 "not %s); %sdeleted (it will be recreated later, if "
1632 "necessary)\n", name, nameShouldBe, base,
1633 (Testing ? "it would have been " : ""));
1638 summary.fileName = ToString(base);
1641 if (params->nVolumes > params->totalVolumes) {
1642 /* We found more volumes than we found on the first partition walk;
1643 * apparently something created a volume while we were
1644 * partition-salvaging, or we found more than 20 vols when salvaging a
1645 * particular volume. Abort if we detect this, since other programs
1646 * supposed to not touch the partition while it is partition-salvaging,
1647 * and we shouldn't find more than 20 vols in a VG.
1649 Abort("Found %ld vol headers, but should have found at most %ld! "
1650 "Make sure the volserver/fileserver are not running at the "
1651 "same time as a partition salvage\n",
1652 afs_printable_int32_ld(params->nVolumes),
1653 afs_printable_int32_ld(params->totalVolumes));
1656 memcpy(params->vsp, &summary, sizeof(summary));
1664 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1666 * If the header could not be read in at all, the header is always unlinked.
1667 * If instead RecordHeader said the header was bad (that is, the header file
1668 * is mis-named), we only unlink if we are doing a partition salvage, as
1669 * opposed to salvaging a specific volume group.
1671 * @param[in] dp the disk partition
1672 * @param[in] name full path to the .vol header
1673 * @param[in] hdr header data, or NULL if the header could not be read
1674 * @param[in] rock actually a struct SalvageScanParams*, with some information
1678 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1679 struct VolumeDiskHeader *hdr, void *rock)
1681 struct SalvageScanParams *params;
1684 params = (struct SalvageScanParams *)rock;
1687 /* no header; header is too bogus to read in at all */
1689 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1695 } else if (!params->singleVolumeNumber) {
1696 /* We were able to read in a header, but RecordHeader said something
1697 * was wrong with it. We only unlink those if we are doing a partition
1704 if (dounlink && unlink(name)) {
1705 Log("Error %d while trying to unlink %s\n", errno, name);
1710 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1711 * the fileserver for VG information, or by scanning the /vicepX partition.
1713 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1714 * are salvaging, or 0 if this is a partition
1717 * @return operation status
1719 * @retval -1 we raced with a fileserver restart; checking out and locking
1720 * volumes must be retried
1723 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1725 afs_int32 nvols = 0;
1726 struct SalvageScanParams params;
1729 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1731 /* we successfully got the vol information from the fileserver; no
1732 * need to scan the partition */
1736 /* we need to retry volume checkout */
1740 if (!singleVolumeNumber) {
1741 /* Count how many volumes we have in /vicepX */
1742 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1745 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1750 nvols = VOL_VG_MAX_VOLS;
1753 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1754 osi_Assert(salvinfo->volumeSummaryp != NULL);
1756 params.singleVolumeNumber = singleVolumeNumber;
1757 params.vsp = salvinfo->volumeSummaryp;
1758 params.nVolumes = 0;
1759 params.totalVolumes = nvols;
1761 params.salvinfo = salvinfo;
1763 /* walk the partition directory of volume headers and record the info
1764 * about them; unlinking invalid headers */
1765 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1766 UnlinkHeader, ¶ms);
1768 /* we apparently need to retry checking-out/locking volumes */
1772 Abort("Failed to get volume header summary\n");
1774 salvinfo->nVolumes = params.nVolumes;
1776 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1782 /* Find the link table. This should be associated with the RW volume or, if
1783 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1786 FindLinkHandle(struct InodeSummary *isp, int nVols,
1787 struct ViceInodeInfo *allInodes)
1790 struct ViceInodeInfo *ip;
1792 for (i = 0; i < nVols; i++) {
1793 ip = allInodes + isp[i].index;
1794 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1795 if (ip[j].u.special.type == VI_LINKTABLE)
1796 return ip[j].inodeNumber;
1803 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1805 struct versionStamp version;
1808 if (!VALID_INO(ino))
1810 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
1811 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1812 if (!VALID_INO(ino))
1814 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1815 isp->RWvolumeId, errno);
1816 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1817 fdP = IH_OPEN(salvinfo->VGLinkH);
1819 Abort("Can't open link table for volume %u (error = %d)\n",
1820 isp->RWvolumeId, errno);
1822 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1823 Abort("Can't truncate link table for volume %u (error = %d)\n",
1824 isp->RWvolumeId, errno);
1826 version.magic = LINKTABLEMAGIC;
1827 version.version = LINKTABLEVERSION;
1829 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1831 Abort("Can't truncate link table for volume %u (error = %d)\n",
1832 isp->RWvolumeId, errno);
1834 FDH_REALLYCLOSE(fdP);
1836 /* If the volume summary exits (i.e., the V*.vol header file exists),
1837 * then set this inode there as well.
1839 if (isp->volSummary)
1840 isp->volSummary->header.linkTable = ino;
1849 SVGParms_t *parms = (SVGParms_t *) arg;
1850 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1855 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1858 pthread_attr_t tattr;
1862 /* Initialize per volume global variables, even if later code does so */
1863 salvinfo->VolumeChanged = 0;
1864 salvinfo->VGLinkH = NULL;
1865 salvinfo->VGLinkH_cnt = 0;
1866 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1868 parms.svgp_inodeSummaryp = isp;
1869 parms.svgp_count = nVols;
1870 parms.svgp_salvinfo = salvinfo;
1871 code = pthread_attr_init(&tattr);
1873 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1877 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1879 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1882 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1884 Log("Failed to create thread to salvage volume group %u\n",
1888 (void)pthread_join(tid, NULL);
1890 #endif /* AFS_NT40_ENV */
1893 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1895 struct ViceInodeInfo *inodes, *allInodes, *ip;
1896 int i, totalInodes, size, salvageTo;
1900 int dec_VGLinkH = 0;
1902 FdHandle_t *fdP = NULL;
1904 salvinfo->VGLinkH_cnt = 0;
1905 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1906 && isp->nSpecialInodes > 0);
1907 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1908 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1911 if (ShowMounts && !haveRWvolume)
1913 if (canfork && !debug && Fork() != 0) {
1914 (void)Wait("Salvage volume group");
1917 for (i = 0, totalInodes = 0; i < nVols; i++)
1918 totalInodes += isp[i].nInodes;
1919 size = totalInodes * sizeof(struct ViceInodeInfo);
1920 inodes = (struct ViceInodeInfo *)malloc(size);
1921 allInodes = inodes - isp->index; /* this would the base of all the inodes
1922 * for the partition, if all the inodes
1923 * had been read into memory */
1925 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1927 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1929 /* Don't try to salvage a read write volume if there isn't one on this
1931 salvageTo = haveRWvolume ? 0 : 1;
1933 #ifdef AFS_NAMEI_ENV
1934 ino = FindLinkHandle(isp, nVols, allInodes);
1935 if (VALID_INO(ino)) {
1936 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1937 fdP = IH_OPEN(salvinfo->VGLinkH);
1939 if (VALID_INO(ino) && fdP != NULL) {
1940 struct versionStamp header;
1941 afs_sfsize_t nBytes;
1943 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1944 if (nBytes != sizeof(struct versionStamp)
1945 || header.magic != LINKTABLEMAGIC) {
1946 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1947 FDH_REALLYCLOSE(fdP);
1951 if (!VALID_INO(ino) || fdP == NULL) {
1952 Log("%s link table for volume %u.\n",
1953 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1955 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1958 struct ViceInodeInfo *ip;
1959 CreateLinkTable(salvinfo, isp, ino);
1960 fdP = IH_OPEN(salvinfo->VGLinkH);
1961 /* Sync fake 1 link counts to the link table, now that it exists */
1963 for (i = 0; i < nVols; i++) {
1964 ip = allInodes + isp[i].index;
1965 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1966 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1973 FDH_REALLYCLOSE(fdP);
1975 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1978 /* Salvage in reverse order--read/write volume last; this way any
1979 * Inodes not referenced by the time we salvage the read/write volume
1980 * can be picked up by the read/write volume */
1981 /* ACTUALLY, that's not done right now--the inodes just vanish */
1982 for (i = nVols - 1; i >= salvageTo; i--) {
1984 struct InodeSummary *lisp = &isp[i];
1985 #ifdef AFS_NAMEI_ENV
1986 /* If only the RO is present on this partition, the link table
1987 * shows up as a RW volume special file. Need to make sure the
1988 * salvager doesn't try to salvage the non-existent RW.
1990 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
1991 /* If this only special inode is the link table, continue */
1992 if (inodes->u.special.type == VI_LINKTABLE) {
1999 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2000 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
2001 /* Check inodes twice. The second time do things seriously. This
2002 * way the whole RO volume can be deleted, below, if anything goes wrong */
2003 for (check = 1; check >= 0; check--) {
2005 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2007 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2008 if (rw && deleteMe) {
2009 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2010 * volume won't be called */
2016 if (rw && check == 1)
2018 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2019 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2025 /* Fix actual inode counts */
2028 Log("totalInodes %d\n",totalInodes);
2029 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2030 static int TraceBadLinkCounts = 0;
2031 #ifdef AFS_NAMEI_ENV
2032 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2033 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2034 VGLinkH_p1 = ip->u.param[0];
2035 continue; /* Deal with this last. */
2038 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2039 TraceBadLinkCounts--; /* Limit reports, per volume */
2040 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2042 while (ip->linkCount > 0) {
2043 /* below used to assert, not break */
2045 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2046 Log("idec failed. inode %s errno %d\n",
2047 PrintInode(stmp, ip->inodeNumber), errno);
2053 while (ip->linkCount < 0) {
2054 /* these used to be asserts */
2056 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2057 Log("iinc failed. inode %s errno %d\n",
2058 PrintInode(stmp, ip->inodeNumber), errno);
2065 #ifdef AFS_NAMEI_ENV
2066 while (dec_VGLinkH > 0) {
2067 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2068 Log("idec failed on link table, errno = %d\n", errno);
2072 while (dec_VGLinkH < 0) {
2073 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2074 Log("iinc failed on link table, errno = %d\n", errno);
2081 /* Directory consistency checks on the rw volume */
2083 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2084 IH_RELEASE(salvinfo->VGLinkH);
2086 if (canfork && !debug) {
2093 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2095 /* Check headers BEFORE forking */
2099 for (i = 0; i < nVols; i++) {
2100 struct VolumeSummary *vs = isp[i].volSummary;
2101 VolumeDiskData volHeader;
2103 /* Don't salvage just because phantom rw volume is there... */
2104 /* (If a read-only volume exists, read/write inodes must also exist) */
2105 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2109 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2110 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2111 == sizeof(volHeader)
2112 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2113 && volHeader.dontSalvage == DONT_SALVAGE
2114 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2115 if (volHeader.inUse != 0) {
2116 volHeader.inUse = 0;
2117 volHeader.inService = 1;
2119 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2120 != sizeof(volHeader)) {
2136 /* SalvageVolumeHeaderFile
2138 * Salvage the top level V*.vol header file. Make sure the special files
2139 * exist and that there are no duplicates.
2141 * Calls SalvageHeader for each possible type of volume special file.
2145 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2146 struct ViceInodeInfo *inodes, int RW,
2147 int check, int *deleteMe)
2150 struct ViceInodeInfo *ip;
2151 int allinodesobsolete = 1;
2152 struct VolumeDiskHeader diskHeader;
2153 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2155 struct VolumeHeader tempHeader;
2156 struct afs_inode_info stuff[MAXINODETYPE];
2158 /* keeps track of special inodes that are probably 'good'; they are
2159 * referenced in the vol header, and are included in the given inodes
2164 } goodspecial[MAXINODETYPE];
2169 memset(goodspecial, 0, sizeof(goodspecial));
2171 skip = malloc(isp->nSpecialInodes * sizeof(*skip));
2173 memset(skip, 0, isp->nSpecialInodes * sizeof(*skip));
2175 Log("cannot allocate memory for inode skip array when salvaging "
2176 "volume %lu; not performing duplicate special inode recovery\n",
2177 afs_printable_uint32_lu(isp->volumeId));
2178 /* still try to perform the salvage; the skip array only does anything
2179 * if we detect duplicate special inodes */
2182 init_inode_info(&tempHeader, stuff);
2185 * First, look at the special inodes and see if any are referenced by
2186 * the existing volume header. If we find duplicate special inodes, we
2187 * can use this information to use the referenced inode (it's more
2188 * likely to be the 'good' one), and throw away the duplicates.
2190 if (isp->volSummary && skip) {
2191 /* use tempHeader, so we can use the stuff[] array to easily index
2192 * into the isp->volSummary special inodes */
2193 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2195 for (i = 0; i < isp->nSpecialInodes; i++) {
2196 ip = &inodes[isp->index + i];
2197 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2198 /* will get taken care of in a later loop */
2201 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2202 goodspecial[ip->u.special.type-1].valid = 1;
2203 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2208 memset(&tempHeader, 0, sizeof(tempHeader));
2209 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2210 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2211 tempHeader.id = isp->volumeId;
2212 tempHeader.parent = isp->RWvolumeId;
2214 /* Check for duplicates (inodes are sorted by type field) */
2215 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2216 ip = &inodes[isp->index + i];
2217 if (ip->u.special.type == (ip + 1)->u.special.type) {
2218 afs_ino_str_t stmp1, stmp2;
2220 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2221 /* Will be caught in the loop below */
2225 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2226 ip->u.special.type, isp->volumeId,
2227 PrintInode(stmp1, ip->inodeNumber),
2228 PrintInode(stmp2, (ip+1)->inodeNumber));
2230 if (skip && goodspecial[ip->u.special.type-1].valid) {
2231 Inode gi = goodspecial[ip->u.special.type-1].inode;
2234 Log("using special inode referenced by vol header (%s)\n",
2235 PrintInode(stmp1, gi));
2238 /* the volume header references some special inode of
2239 * this type in the inodes array; are we it? */
2240 if (ip->inodeNumber != gi) {
2242 } else if ((ip+1)->inodeNumber != gi) {
2243 /* in case this is the last iteration; we need to
2244 * make sure we check ip+1, too */
2249 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2257 for (i = 0; i < isp->nSpecialInodes; i++) {
2259 ip = &inodes[isp->index + i];
2260 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2262 Log("Rubbish header inode %s of type %d\n",
2263 PrintInode(stmp, ip->inodeNumber),
2264 ip->u.special.type);
2270 Log("Rubbish header inode %s of type %d; deleted\n",
2271 PrintInode(stmp, ip->inodeNumber),
2272 ip->u.special.type);
2273 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2274 if (skip && skip[i]) {
2275 if (orphans == ORPH_REMOVE) {
2276 Log("Removing orphan special inode %s of type %d\n",
2277 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2280 Log("Ignoring orphan special inode %s of type %d\n",
2281 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2282 /* fall through to the ip->linkCount--; line below */
2285 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2286 allinodesobsolete = 0;
2288 if (!check && ip->u.special.type != VI_LINKTABLE)
2289 ip->linkCount--; /* Keep the inode around */
2297 if (allinodesobsolete) {
2304 salvinfo->VGLinkH_cnt++; /* one for every header. */
2306 if (!RW && !check && isp->volSummary) {
2307 ClearROInUseBit(isp->volSummary);
2311 for (i = 0; i < MAXINODETYPE; i++) {
2312 if (stuff[i].inodeType == VI_LINKTABLE) {
2313 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2314 * And we may have recreated the link table earlier, so set the
2315 * RW header as well. The header magic was already checked.
2317 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2318 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2322 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2326 if (isp->volSummary == NULL) {
2328 char headerName[64];
2329 snprintf(headerName, sizeof headerName, VFORMAT,
2330 afs_printable_uint32_lu(isp->volumeId));
2331 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2332 salvinfo->fileSysPath, headerName);
2334 Log("No header file for volume %u\n", isp->volumeId);
2338 Log("No header file for volume %u; %screating %s\n",
2339 isp->volumeId, (Testing ? "it would have been " : ""),
2341 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2342 isp->volSummary->fileName = ToString(headerName);
2344 writefunc = VCreateVolumeDiskHeader;
2347 char headerName[64];
2348 /* hack: these two fields are obsolete... */
2349 isp->volSummary->header.volumeAcl = 0;
2350 isp->volSummary->header.volumeMountTable = 0;
2353 (&isp->volSummary->header, &tempHeader,
2354 sizeof(struct VolumeHeader))) {
2355 /* We often remove the name before calling us, so we make a fake one up */
2356 if (isp->volSummary->fileName) {
2357 strcpy(headerName, isp->volSummary->fileName);
2359 snprintf(headerName, sizeof headerName, VFORMAT,
2360 afs_printable_uint32_lu(isp->volumeId));
2361 isp->volSummary->fileName = ToString(headerName);
2363 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2364 salvinfo->fileSysPath, headerName);
2366 Log("Header file %s is damaged or no longer valid%s\n", path,
2367 (check ? "" : "; repairing"));
2371 writefunc = VWriteVolumeDiskHeader;
2375 memcpy(&isp->volSummary->header, &tempHeader,
2376 sizeof(struct VolumeHeader));
2379 Log("It would have written a new header file for volume %u\n",
2383 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2384 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2386 Log("Error %ld writing volume header file for volume %lu\n",
2387 afs_printable_int32_ld(code),
2388 afs_printable_uint32_lu(diskHeader.id));
2393 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2394 isp->volSummary->header.volumeInfo);
2399 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2400 struct InodeSummary *isp, int check, int *deleteMe)
2403 VolumeDiskData volumeInfo;
2404 struct versionStamp fileHeader;
2413 #ifndef AFS_NAMEI_ENV
2414 if (sp->inodeType == VI_LINKTABLE)
2415 return 0; /* header magic was already checked */
2417 if (*(sp->inode) == 0) {
2419 Log("Missing inode in volume header (%s)\n", sp->description);
2423 Log("Missing inode in volume header (%s); %s\n", sp->description,
2424 (Testing ? "it would have recreated it" : "recreating"));
2427 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2428 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2429 if (!VALID_INO(*(sp->inode)))
2431 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2432 sp->description, errno);
2437 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2438 fdP = IH_OPEN(specH);
2439 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2440 /* bail out early and destroy the volume */
2442 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2449 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2450 sp->description, errno);
2453 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2454 || header.fileHeader.magic != sp->stamp.magic)) {
2456 Log("Part of the header (%s) is corrupted\n", sp->description);
2457 FDH_REALLYCLOSE(fdP);
2461 Log("Part of the header (%s) is corrupted; recreating\n",
2464 /* header can be garbage; make sure we don't read garbage data from
2466 memset(&header, 0, sizeof(header));
2468 if (sp->inodeType == VI_VOLINFO
2469 && header.volumeInfo.destroyMe == DESTROY_ME) {
2472 FDH_REALLYCLOSE(fdP);
2476 if (recreate && !Testing) {
2479 ("Internal error: recreating volume header (%s) in check mode\n",
2481 nBytes = FDH_TRUNC(fdP, 0);
2483 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2484 sp->description, errno);
2486 /* The following code should be moved into vutil.c */
2487 if (sp->inodeType == VI_VOLINFO) {
2489 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2490 header.volumeInfo.stamp = sp->stamp;
2491 header.volumeInfo.id = isp->volumeId;
2492 header.volumeInfo.parentId = isp->RWvolumeId;
2493 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2494 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2495 isp->volumeId, isp->volumeId);
2496 header.volumeInfo.inService = 0;
2497 header.volumeInfo.blessed = 0;
2498 /* The + 1000 is a hack in case there are any files out in venus caches */
2499 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2500 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2501 header.volumeInfo.needsCallback = 0;
2502 gettimeofday(&tp, NULL);
2503 header.volumeInfo.creationDate = tp.tv_sec;
2505 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2506 sizeof(header.volumeInfo), 0);
2507 if (nBytes != sizeof(header.volumeInfo)) {
2510 ("Unable to write volume header file (%s) (errno = %d)\n",
2511 sp->description, errno);
2512 Abort("Unable to write entire volume header file (%s)\n",
2516 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2517 if (nBytes != sizeof(sp->stamp)) {
2520 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2521 sp->description, errno);
2523 ("Unable to write entire version stamp in volume header file (%s)\n",
2528 FDH_REALLYCLOSE(fdP);
2530 if (sp->inodeType == VI_VOLINFO) {
2531 salvinfo->VolInfo = header.volumeInfo;
2535 if (salvinfo->VolInfo.updateDate) {
2536 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2538 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2539 salvinfo->VolInfo.id,
2540 (Testing ? "it would have been " : ""), update);
2542 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2544 Log("%s (%u) not updated (created %s)\n",
2545 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2555 SalvageVnodes(struct SalvInfo *salvinfo,
2556 struct InodeSummary *rwIsp,
2557 struct InodeSummary *thisIsp,
2558 struct ViceInodeInfo *inodes, int check)
2560 int ilarge, ismall, ioffset, RW, nInodes;
2561 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2564 RW = (rwIsp == thisIsp);
2565 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2567 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2568 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2569 if (check && ismall == -1)
2572 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2573 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2574 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2578 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2579 struct ViceInodeInfo *ip, int nInodes,
2580 struct VolumeSummary *volSummary, int check)
2582 char buf[SIZEOF_LARGEDISKVNODE];
2583 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2585 StreamHandle_t *file;
2586 struct VnodeClassInfo *vcp;
2588 afs_sfsize_t nVnodes;
2589 afs_fsize_t vnodeLength;
2591 afs_ino_str_t stmp1, stmp2;
2595 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2596 fdP = IH_OPEN(handle);
2597 osi_Assert(fdP != NULL);
2598 file = FDH_FDOPEN(fdP, "r+");
2599 osi_Assert(file != NULL);
2600 vcp = &VnodeClassInfo[class];
2601 size = OS_SIZE(fdP->fd_fd);
2602 osi_Assert(size != -1);
2603 nVnodes = (size / vcp->diskSize) - 1;
2605 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2606 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2610 for (vnodeIndex = 0;
2611 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2612 nVnodes--, vnodeIndex++) {
2613 if (vnode->type != vNull) {
2614 int vnodeChanged = 0;
2615 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2616 if (VNDISK_GET_INO(vnode) == 0) {
2618 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2619 memset(vnode, 0, vcp->diskSize);
2623 if (vcp->magic != vnode->vnodeMagic) {
2624 /* bad magic #, probably partially created vnode */
2626 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2627 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2628 afs_printable_uint32_lu(vcp->magic));
2629 memset(vnode, 0, vcp->diskSize);
2633 Log("Partially allocated vnode %d deleted.\n",
2635 memset(vnode, 0, vcp->diskSize);
2639 /* ****** Should do a bit more salvage here: e.g. make sure
2640 * vnode type matches what it should be given the index */
2641 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2642 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2643 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2644 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2651 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2652 /* The following doesn't work, because the version number
2653 * is not maintained correctly by the file server */
2654 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2655 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2657 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2663 /* For RW volume, look for vnode with matching inode number;
2664 * if no such match, take the first determined by our sort
2666 struct ViceInodeInfo *lip = ip;
2667 int lnInodes = nInodes;
2669 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2670 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2679 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2680 /* "Matching" inode */
2684 vu = vnode->uniquifier;
2685 iu = ip->u.vnode.vnodeUniquifier;
2686 vd = vnode->dataVersion;
2687 id = ip->u.vnode.inodeDataVersion;
2689 * Because of the possibility of the uniquifier overflows (> 4M)
2690 * we compare them modulo the low 22-bits; we shouldn't worry
2691 * about mismatching since they shouldn't to many old
2692 * uniquifiers of the same vnode...
2694 if (IUnique(vu) != IUnique(iu)) {
2696 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2699 vnode->uniquifier = iu;
2700 #ifdef AFS_3DISPARES
2701 vnode->dataVersion = (id >= vd ?
2704 1887437 ? vd : id) :
2707 1887437 ? id : vd));
2709 #if defined(AFS_SGI_EXMAG)
2710 vnode->dataVersion = (id >= vd ?
2713 15099494 ? vd : id) :
2716 15099494 ? id : vd));
2718 vnode->dataVersion = (id > vd ? id : vd);
2719 #endif /* AFS_SGI_EXMAG */
2720 #endif /* AFS_3DISPARES */
2723 /* don't bother checking for vd > id any more, since
2724 * partial file transfers always result in this state,
2725 * and you can't do much else anyway (you've already
2726 * found the best data you can) */
2727 #ifdef AFS_3DISPARES
2728 if (!vnodeIsDirectory(vnodeNumber)
2729 && ((vd < id && (id - vd) < 1887437)
2730 || ((vd > id && (vd - id) > 1887437)))) {
2732 #if defined(AFS_SGI_EXMAG)
2733 if (!vnodeIsDirectory(vnodeNumber)
2734 && ((vd < id && (id - vd) < 15099494)
2735 || ((vd > id && (vd - id) > 15099494)))) {
2737 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2738 #endif /* AFS_SGI_EXMAG */
2741 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2742 vnode->dataVersion = id;
2747 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2750 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2752 VNDISK_SET_INO(vnode, ip->inodeNumber);
2757 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2759 VNDISK_SET_INO(vnode, ip->inodeNumber);
2762 VNDISK_GET_LEN(vnodeLength, vnode);
2763 if (ip->byteCount != vnodeLength) {
2766 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2771 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2772 VNDISK_SET_LEN(vnode, ip->byteCount);
2776 ip->linkCount--; /* Keep the inode around */
2779 } else { /* no matching inode */
2781 if (VNDISK_GET_INO(vnode) != 0
2782 || vnode->type == vDirectory) {
2783 /* No matching inode--get rid of the vnode */
2785 if (VNDISK_GET_INO(vnode)) {
2787 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2791 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2796 if (VNDISK_GET_INO(vnode)) {
2798 time_t serverModifyTime = vnode->serverModifyTime;
2799 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2803 time_t serverModifyTime = vnode->serverModifyTime;
2804 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2807 memset(vnode, 0, vcp->diskSize);
2810 /* Should not reach here becuase we checked for
2811 * (inodeNumber == 0) above. And where we zero the vnode,
2812 * we also goto vnodeDone.
2816 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2820 } /* VNDISK_GET_INO(vnode) != 0 */
2822 osi_Assert(!(vnodeChanged && check));
2823 if (vnodeChanged && !Testing) {
2824 osi_Assert(IH_IWRITE
2825 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2826 (char *)vnode, vcp->diskSize)
2828 salvinfo->VolumeChanged = 1; /* For break call back */
2839 struct VnodeEssence *
2840 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2843 struct VnodeInfo *vip;
2846 class = vnodeIdToClass(vnodeNumber);
2847 vip = &salvinfo->vnodeInfo[class];
2848 offset = vnodeIdToBitNumber(vnodeNumber);
2849 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2853 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2855 /* Copy the directory unconditionally if we are going to change it:
2856 * not just if was cloned.
2858 struct VnodeDiskObject vnode;
2859 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2860 Inode oldinode, newinode;
2863 if (dir->copied || Testing)
2865 DFlush(); /* Well justified paranoia... */
2868 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2869 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2871 osi_Assert(code == sizeof(vnode));
2872 oldinode = VNDISK_GET_INO(&vnode);
2873 /* Increment the version number by a whole lot to avoid problems with
2874 * clients that were promised new version numbers--but the file server
2875 * crashed before the versions were written to disk.
2878 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2879 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2881 osi_Assert(VALID_INO(newinode));
2882 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2884 VNDISK_SET_INO(&vnode, newinode);
2886 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2887 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2889 osi_Assert(code == sizeof(vnode));
2891 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2892 salvinfo->fileSysDevice, newinode,
2893 &salvinfo->VolumeChanged);
2894 /* Don't delete the original inode right away, because the directory is
2895 * still being scanned.
2901 * This function should either successfully create a new dir, or give up
2902 * and leave things the way they were. In particular, if it fails to write
2903 * the new dir properly, it should return w/o changing the reference to the
2907 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2909 struct VnodeDiskObject vnode;
2910 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2911 Inode oldinode, newinode;
2916 afs_int32 parentUnique = 1;
2917 struct VnodeEssence *vnodeEssence;
2922 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2924 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2925 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2927 osi_Assert(lcode == sizeof(vnode));
2928 oldinode = VNDISK_GET_INO(&vnode);
2929 /* Increment the version number by a whole lot to avoid problems with
2930 * clients that were promised new version numbers--but the file server
2931 * crashed before the versions were written to disk.
2934 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2935 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2937 osi_Assert(VALID_INO(newinode));
2938 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2939 &salvinfo->VolumeChanged);
2941 /* Assign . and .. vnode numbers from dir and vnode.parent.
2942 * The uniquifier for . is in the vnode.
2943 * The uniquifier for .. might be set to a bogus value of 1 and
2944 * the salvager will later clean it up.
2946 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2947 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2950 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2952 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2957 /* didn't really build the new directory properly, let's just give up. */
2958 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2959 Log("Directory salvage returned code %d, continuing.\n", code);
2961 Log("also failed to decrement link count on new inode");
2965 Log("Checking the results of the directory salvage...\n");
2966 if (!DirOK(&newdir)) {
2967 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2968 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2969 osi_Assert(code == 0);
2973 VNDISK_SET_INO(&vnode, newinode);
2974 length = afs_dir_Length(&newdir);
2975 VNDISK_SET_LEN(&vnode, length);
2977 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2978 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2980 osi_Assert(lcode == sizeof(vnode));
2983 nt_sync(salvinfo->fileSysDevice);
2985 sync(); /* this is slow, but hopefully rarely called. We don't have
2986 * an open FD on the file itself to fsync.
2990 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
2992 /* make sure old directory file is really closed */
2993 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
2994 FDH_REALLYCLOSE(fdP);
2996 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
2997 osi_Assert(code == 0);
2998 dir->dirHandle = newdir;
3002 * arguments for JudgeEntry.
3004 struct judgeEntry_params {
3005 struct DirSummary *dir; /**< directory we're examining entries in */
3006 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3010 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3013 struct judgeEntry_params *params = arock;
3014 struct DirSummary *dir = params->dir;
3015 struct SalvInfo *salvinfo = params->salvinfo;
3016 struct VnodeEssence *vnodeEssence;
3017 afs_int32 dirOrphaned, todelete;
3019 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3021 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3022 if (vnodeEssence == NULL) {
3024 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3027 CopyOnWrite(salvinfo, dir);
3028 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3033 #ifndef AFS_NAMEI_ENV
3034 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3035 * mount inode for the partition. If this inode were deleted, it would crash
3038 if (vnodeEssence->InodeNumber == 0) {
3039 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3041 CopyOnWrite(salvinfo, dir);
3042 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3049 if (!(vnodeNumber & 1) && !Showmode
3050 && !(vnodeEssence->count || vnodeEssence->unique
3051 || vnodeEssence->modeBits)) {
3052 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3053 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3054 vnodeNumber, unique,
3055 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3059 CopyOnWrite(salvinfo, dir);
3060 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3066 /* Check if the Uniquifiers match. If not, change the directory entry
3067 * so its unique matches the vnode unique. Delete if the unique is zero
3068 * or if the directory is orphaned.
3070 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3071 if (!vnodeEssence->unique
3072 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3073 /* This is an orphaned directory. Don't delete the . or ..
3074 * entry. Otherwise, it will get created in the next
3075 * salvage and deleted again here. So Just skip it.
3080 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3083 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3087 fid.Vnode = vnodeNumber;
3088 fid.Unique = vnodeEssence->unique;
3089 CopyOnWrite(salvinfo, dir);
3090 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3092 osi_Assert(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3095 return 0; /* no need to continue */
3098 if (strcmp(name, ".") == 0) {
3099 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3102 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3104 CopyOnWrite(salvinfo, dir);
3105 osi_Assert(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3106 fid.Vnode = dir->vnodeNumber;
3107 fid.Unique = dir->unique;
3108 osi_Assert(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3111 vnodeNumber = fid.Vnode; /* Get the new Essence */
3112 unique = fid.Unique;
3113 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3116 } else if (strcmp(name, "..") == 0) {
3119 struct VnodeEssence *dotdot;
3120 pa.Vnode = dir->parent;
3121 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3122 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3123 pa.Unique = dotdot->unique;
3125 pa.Vnode = dir->vnodeNumber;
3126 pa.Unique = dir->unique;
3128 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3130 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3132 CopyOnWrite(salvinfo, dir);
3133 osi_Assert(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3134 osi_Assert(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3137 vnodeNumber = pa.Vnode; /* Get the new Essence */
3139 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3141 dir->haveDotDot = 1;
3142 } else if (strncmp(name, ".__afs", 6) == 0) {
3144 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3147 CopyOnWrite(salvinfo, dir);
3148 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3150 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3151 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3154 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3155 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3156 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3157 && !(vnodeEssence->modeBits & 0111)) {
3158 afs_sfsize_t nBytes;
3164 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3165 vnodeEssence->InodeNumber);
3168 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3172 size = FDH_SIZE(fdP);
3174 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3175 FDH_REALLYCLOSE(fdP);
3182 nBytes = FDH_PREAD(fdP, buf, size, 0);
3183 if (nBytes == size) {
3185 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3186 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3187 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3188 Testing ? "would convert" : "converted");
3189 vnodeEssence->modeBits |= 0111;
3190 vnodeEssence->changed = 1;
3191 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3192 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3193 dir->name ? dir->name : "??", name, buf);
3195 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3196 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3198 FDH_REALLYCLOSE(fdP);
3201 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3202 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3203 if (vnodeIdToClass(vnodeNumber) == vLarge
3204 && vnodeEssence->name == NULL) {
3206 if ((n = (char *)malloc(strlen(name) + 1)))
3208 vnodeEssence->name = n;
3211 /* The directory entry points to the vnode. Check to see if the
3212 * vnode points back to the directory. If not, then let the
3213 * directory claim it (else it might end up orphaned). Vnodes
3214 * already claimed by another directory are deleted from this
3215 * directory: hardlinks to the same vnode are not allowed
3216 * from different directories.
3218 if (vnodeEssence->parent != dir->vnodeNumber) {
3219 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3220 /* Vnode does not point back to this directory.
3221 * Orphaned dirs cannot claim a file (it may belong to
3222 * another non-orphaned dir).
3225 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3227 vnodeEssence->parent = dir->vnodeNumber;
3228 vnodeEssence->changed = 1;
3230 /* Vnode was claimed by another directory */
3233 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3234 } else if (vnodeNumber == 1) {
3235 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3237 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3241 CopyOnWrite(salvinfo, dir);
3242 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3247 /* This directory claims the vnode */
3248 vnodeEssence->claimed = 1;
3250 vnodeEssence->count--;
3255 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3256 VnodeClass class, Inode ino, Unique * maxu)
3258 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3259 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3260 char buf[SIZEOF_LARGEDISKVNODE];
3261 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3263 StreamHandle_t *file;
3268 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3269 fdP = IH_OPEN(vip->handle);
3270 osi_Assert(fdP != NULL);
3271 file = FDH_FDOPEN(fdP, "r+");
3272 osi_Assert(file != NULL);
3273 size = OS_SIZE(fdP->fd_fd);
3274 osi_Assert(size != -1);
3275 vip->nVnodes = (size / vcp->diskSize) - 1;
3276 if (vip->nVnodes > 0) {
3277 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3278 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3279 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3280 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3281 if (class == vLarge) {
3282 osi_Assert((vip->inodes = (Inode *)
3283 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3292 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3293 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3294 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3295 nVnodes--, vnodeIndex++) {
3296 if (vnode->type != vNull) {
3297 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3298 afs_fsize_t vnodeLength;
3299 vip->nAllocatedVnodes++;
3300 vep->count = vnode->linkCount;
3301 VNDISK_GET_LEN(vnodeLength, vnode);
3302 vep->blockCount = nBlocks(vnodeLength);
3303 vip->volumeBlockCount += vep->blockCount;
3304 vep->parent = vnode->parent;
3305 vep->unique = vnode->uniquifier;
3306 if (*maxu < vnode->uniquifier)
3307 *maxu = vnode->uniquifier;
3308 vep->modeBits = vnode->modeBits;
3309 vep->InodeNumber = VNDISK_GET_INO(vnode);
3310 vep->type = vnode->type;
3311 vep->author = vnode->author;
3312 vep->owner = vnode->owner;
3313 vep->group = vnode->group;
3314 if (vnode->type == vDirectory) {
3315 if (class != vLarge) {
3316 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3317 vip->nAllocatedVnodes--;
3318 memset(vnode, 0, sizeof(vnode));
3319 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3320 vnodeIndexOffset(vcp, vnodeNumber),
3321 (char *)&vnode, sizeof(vnode));
3322 salvinfo->VolumeChanged = 1;
3324 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3333 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3336 struct VnodeEssence *parentvp;
3342 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3343 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3344 strcat(path, OS_DIRSEP);
3345 strcat(path, vp->name);
3351 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3352 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3355 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3357 struct VnodeEssence *vep;
3360 return (1); /* Vnode zero does not exist */
3362 return (0); /* The root dir vnode is always claimed */
3363 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3364 if (!vep || !vep->claimed)
3365 return (1); /* Vnode is not claimed - it is orphaned */
3367 return (IsVnodeOrphaned(salvinfo, vep->parent));
3371 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3372 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3373 struct DirSummary *rootdir, int *rootdirfound)
3375 static struct DirSummary dir;
3376 static struct DirHandle dirHandle;
3377 struct VnodeEssence *parent;
3378 static char path[MAXPATHLEN];
3381 if (dirVnodeInfo->vnodes[i].salvaged)
3382 return; /* already salvaged */
3385 dirVnodeInfo->vnodes[i].salvaged = 1;
3387 if (dirVnodeInfo->inodes[i] == 0)
3388 return; /* Not allocated to a directory */
3390 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3391 if (dirVnodeInfo->vnodes[i].parent) {
3392 Log("Bad parent, vnode 1; %s...\n",
3393 (Testing ? "skipping" : "salvaging"));
3394 dirVnodeInfo->vnodes[i].parent = 0;
3395 dirVnodeInfo->vnodes[i].changed = 1;
3398 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3399 if (parent && parent->salvaged == 0)
3400 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3401 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3402 rootdir, rootdirfound);
3405 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3406 dir.unique = dirVnodeInfo->vnodes[i].unique;
3409 dir.parent = dirVnodeInfo->vnodes[i].parent;
3410 dir.haveDot = dir.haveDotDot = 0;
3411 dir.ds_linkH = alinkH;
3412 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3413 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3415 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3418 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3419 (Testing ? "skipping" : "salvaging"));
3422 CopyAndSalvage(salvinfo, &dir);
3424 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3427 dirHandle = dir.dirHandle;
3430 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3431 &dirVnodeInfo->vnodes[i], path);
3434 /* If enumeration failed for random reasons, we will probably delete
3435 * too much stuff, so we guard against this instead.
3437 struct judgeEntry_params judge_params;
3438 judge_params.salvinfo = salvinfo;
3439 judge_params.dir = &dir;
3441 osi_Assert(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3442 &judge_params) == 0);
3445 /* Delete the old directory if it was copied in order to salvage.
3446 * CopyOnWrite has written the new inode # to the disk, but we still
3447 * have the old one in our local structure here. Thus, we idec the
3451 if (dir.copied && !Testing) {
3452 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3453 osi_Assert(code == 0);
3454 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3457 /* Remember rootdir DirSummary _after_ it has been judged */
3458 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3459 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3467 * Get a new FID that can be used to create a new file.
3469 * @param[in] volHeader vol header for the volume
3470 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3471 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3472 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3473 * updated to the new max unique if we create a new
3477 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3478 VnodeClass class, AFSFid *afid, Unique *maxunique)
3481 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3482 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3486 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3487 /* no free vnodes; make a new one */
3488 salvinfo->vnodeInfo[class].nVnodes++;
3489 salvinfo->vnodeInfo[class].vnodes =
3490 realloc(salvinfo->vnodeInfo[class].vnodes,
3491 sizeof(struct VnodeEssence) * (i+1));
3493 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3496 afid->Vnode = bitNumberToVnodeNumber(i, class);
3498 if (volHeader->uniquifier < (*maxunique + 1)) {
3499 /* header uniq is bad; it will get bumped by 2000 later */
3500 afid->Unique = *maxunique + 1 + 2000;
3503 /* header uniq seems okay; just use that */
3504 afid->Unique = *maxunique = volHeader->uniquifier++;
3509 * Create a vnode for a README file explaining not to use a recreated-root vol.
3511 * @param[in] volHeader vol header for the volume
3512 * @param[in] alinkH ihandle for i/o for the volume
3513 * @param[in] vid volume id
3514 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3515 * updated to the new max unique if we create a new
3517 * @param[out] afid FID for the new readme vnode
3518 * @param[out] ainode the inode for the new readme file
3520 * @return operation status
3525 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3526 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3530 struct VnodeDiskObject *rvnode = NULL;
3532 IHandle_t *readmeH = NULL;
3533 struct VnodeEssence *vep;
3535 time_t now = time(NULL);
3537 /* Try to make the note brief, but informative. Only administrators should
3538 * be able to read this file at first, so we can hopefully assume they
3539 * know what AFS is, what a volume is, etc. */
3541 "This volume has been salvaged, but has lost its original root directory.\n"
3542 "The root directory that exists now has been recreated from orphan files\n"
3543 "from the rest of the volume. This recreated root directory may interfere\n"
3544 "with old cached data on clients, and there is no way the salvager can\n"
3545 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3546 "use this volume, but only copy the salvaged data to a new volume.\n"
3547 "Continuing to use this volume as it exists now may cause some clients to\n"
3548 "behave oddly when accessing this volume.\n"
3549 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3550 /* ^ the person reading this probably just lost some data, so they could
3551 * use some cheering up. */
3553 /* -1 for the trailing NUL */
3554 length = sizeof(readme) - 1;
3556 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3558 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3560 /* create the inode and write the contents */
3561 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3562 salvinfo->fileSysPath, 0, vid,
3563 afid->Vnode, afid->Unique, 1);
3564 if (!VALID_INO(readmeinode)) {
3565 Log("CreateReadme: readme IH_CREATE failed\n");
3569 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3570 bytes = IH_IWRITE(readmeH, 0, readme, length);
3571 IH_RELEASE(readmeH);
3573 if (bytes != length) {
3574 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3575 (int)sizeof(readme));
3579 /* create the vnode and write it out */
3580 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3582 Log("CreateRootDir: error alloc'ing memory\n");
3586 rvnode->type = vFile;
3588 rvnode->modeBits = 0777;
3589 rvnode->linkCount = 1;
3590 VNDISK_SET_LEN(rvnode, length);
3591 rvnode->uniquifier = afid->Unique;
3592 rvnode->dataVersion = 1;
3593 VNDISK_SET_INO(rvnode, readmeinode);
3594 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3599 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3601 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3602 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3603 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3605 if (bytes != SIZEOF_SMALLDISKVNODE) {
3606 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3607 (int)SIZEOF_SMALLDISKVNODE);
3611 /* update VnodeEssence for new readme vnode */
3612 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3614 vep->blockCount = nBlocks(length);
3615 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3616 vep->parent = rvnode->parent;
3617 vep->unique = rvnode->uniquifier;
3618 vep->modeBits = rvnode->modeBits;
3619 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3620 vep->type = rvnode->type;
3621 vep->author = rvnode->author;
3622 vep->owner = rvnode->owner;
3623 vep->group = rvnode->group;
3633 *ainode = readmeinode;
3638 if (IH_DEC(alinkH, readmeinode, vid)) {
3639 Log("CreateReadme (recovery): IH_DEC failed\n");
3651 * create a root dir for a volume that lacks one.
3653 * @param[in] volHeader vol header for the volume
3654 * @param[in] alinkH ihandle for disk access for this volume group
3655 * @param[in] vid volume id we're dealing with
3656 * @param[out] rootdir populated with info about the new root dir
3657 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3658 * updated to the new max unique if we create a new
3661 * @return operation status
3666 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3667 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3671 int decroot = 0, decreadme = 0;
3672 AFSFid did, readmeid;
3675 struct VnodeDiskObject *rootvnode = NULL;
3676 struct acl_accessList *ACL;
3679 struct VnodeEssence *vep;
3681 time_t now = time(NULL);
3683 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3684 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3688 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3689 /* We don't have any large vnodes in the volume; allocate room
3690 * for one so we can recreate the root dir */
3691 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3692 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3693 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3695 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3696 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3699 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3700 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3701 if (vep->type != vNull) {
3702 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3706 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3707 &readmeinode) != 0) {
3712 /* set the DV to a very high number, so it is unlikely that we collide
3713 * with a cached DV */
3716 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3718 if (!VALID_INO(rootinode)) {
3719 Log("CreateRootDir: IH_CREATE failed\n");
3724 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3725 rootinode, &salvinfo->VolumeChanged);
3729 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3730 Log("CreateRootDir: MakeDir failed\n");
3733 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3734 Log("CreateRootDir: Create failed\n");
3738 length = afs_dir_Length(&rootdir->dirHandle);
3739 DZap(&rootdir->dirHandle);
3741 /* create the new root dir vnode */
3742 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3744 Log("CreateRootDir: malloc failed\n");
3748 /* only give 'rl' permissions to 'system:administrators'. We do this to
3749 * try to catch the attention of an administrator, that they should not
3750 * be writing to this directory or continue to use it. */
3751 ACL = VVnodeDiskACL(rootvnode);
3752 ACL->size = sizeof(struct acl_accessList);
3753 ACL->version = ACL_ACLVERSION;
3757 ACL->entries[0].id = -204; /* system:administrators */
3758 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3760 rootvnode->type = vDirectory;
3761 rootvnode->cloned = 0;
3762 rootvnode->modeBits = 0777;
3763 rootvnode->linkCount = 2;
3764 VNDISK_SET_LEN(rootvnode, length);
3765 rootvnode->uniquifier = 1;
3766 rootvnode->dataVersion = dv;
3767 VNDISK_SET_INO(rootvnode, rootinode);
3768 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3769 rootvnode->author = 0;
3770 rootvnode->owner = 0;
3771 rootvnode->parent = 0;
3772 rootvnode->group = 0;
3773 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3775 /* write it out to disk */
3776 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3777 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3778 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3780 if (bytes != SIZEOF_LARGEDISKVNODE) {
3781 /* just cast to int and don't worry about printing real 64-bit ints;
3782 * a large disk vnode isn't anywhere near the 32-bit limit */
3783 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3784 (int)SIZEOF_LARGEDISKVNODE);
3788 /* update VnodeEssence for the new root vnode */
3789 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3791 vep->blockCount = nBlocks(length);
3792 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3793 vep->parent = rootvnode->parent;
3794 vep->unique = rootvnode->uniquifier;
3795 vep->modeBits = rootvnode->modeBits;
3796 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3797 vep->type = rootvnode->type;
3798 vep->author = rootvnode->author;
3799 vep->owner = rootvnode->owner;
3800 vep->group = rootvnode->group;
3810 /* update DirSummary for the new root vnode */
3811 rootdir->vnodeNumber = 1;
3812 rootdir->unique = 1;
3813 rootdir->haveDot = 1;
3814 rootdir->haveDotDot = 1;
3815 rootdir->rwVid = vid;
3816 rootdir->copied = 0;
3817 rootdir->parent = 0;
3818 rootdir->name = strdup(".");
3819 rootdir->vname = volHeader->name;
3820 rootdir->ds_linkH = alinkH;
3827 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3828 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3830 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3831 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3841 * salvage a volume group.
3843 * @param[in] salvinfo information for the curent salvage job
3844 * @param[in] rwIsp inode summary for rw volume
3845 * @param[in] alinkH link table inode handle
3847 * @return operation status
3851 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3853 /* This routine, for now, will only be called for read-write volumes */
3855 int BlocksInVolume = 0, FilesInVolume = 0;
3857 struct DirSummary rootdir, oldrootdir;
3858 struct VnodeInfo *dirVnodeInfo;
3859 struct VnodeDiskObject vnode;
3860 VolumeDiskData volHeader;
3862 int orphaned, rootdirfound = 0;
3863 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3864 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3865 struct VnodeEssence *vep;
3868 afs_sfsize_t nBytes;
3870 VnodeId LFVnode, ThisVnode;
3871 Unique LFUnique, ThisUnique;
3875 vid = rwIsp->volSummary->header.id;
3876 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3877 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3878 osi_Assert(nBytes == sizeof(volHeader));
3879 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3880 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3881 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3883 DistilVnodeEssence(salvinfo, vid, vLarge,
3884 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3885 DistilVnodeEssence(salvinfo, vid, vSmall,
3886 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3888 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3889 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3890 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3891 &rootdir, &rootdirfound);
3894 nt_sync(salvinfo->fileSysDevice);
3896 sync(); /* This used to be done lower level, for every dir */
3903 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3905 Log("Cannot find root directory for volume %lu; attempting to create "
3906 "a new one\n", afs_printable_uint32_lu(vid));
3908 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3913 salvinfo->VolumeChanged = 1;
3917 /* Parse each vnode looking for orphaned vnodes and
3918 * connect them to the tree as orphaned (if requested).
3920 oldrootdir = rootdir;
3921 for (class = 0; class < nVNODECLASSES; class++) {
3922 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3923 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3924 ThisVnode = bitNumberToVnodeNumber(v, class);
3925 ThisUnique = vep->unique;
3927 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3928 continue; /* Ignore unused, claimed, and root vnodes */
3930 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3931 * entry in this vnode had incremented the parent link count (In
3932 * JudgeEntry()). We need to go to the parent and decrement that
3933 * link count. But if the parent's unique is zero, then the parent
3934 * link count was not incremented in JudgeEntry().
3936 if (class == vLarge) { /* directory vnode */
3937 pv = vnodeIdToBitNumber(vep->parent);
3938 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3939 if (vep->parent == 1 && newrootdir) {
3940 /* this vnode's parent was the volume root, and
3941 * we just created the volume root. So, the parent
3942 * dir didn't exist during JudgeEntry, so the link
3943 * count was not inc'd there, so don't dec it here.
3949 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3955 continue; /* If no rootdir, can't attach orphaned files */
3957 /* Here we attach orphaned files and directories into the
3958 * root directory, LVVnode, making sure link counts stay correct.
3960 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3961 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3962 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3964 /* Update this orphaned vnode's info. Its parent info and
3965 * link count (do for orphaned directories and files).
3967 vep->parent = LFVnode; /* Parent is the root dir */
3968 vep->unique = LFUnique;
3971 vep->count--; /* Inc link count (root dir will pt to it) */
3973 /* If this orphaned vnode is a directory, change '..'.
3974 * The name of the orphaned dir/file is unknown, so we
3975 * build a unique name. No need to CopyOnWrite the directory
3976 * since it is not connected to tree in BK or RO volume and
3977 * won't be visible there.
3979 if (class == vLarge) {
3983 /* Remove and recreate the ".." entry in this orphaned directory */
3984 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3985 salvinfo->vnodeInfo[class].inodes[v],
3986 &salvinfo->VolumeChanged);
3988 pa.Unique = LFUnique;
3989 osi_Assert(afs_dir_Delete(&dh, "..") == 0);
3990 osi_Assert(afs_dir_Create(&dh, "..", &pa) == 0);
3992 /* The original parent's link count was decremented above.
3993 * Here we increment the new parent's link count.
3995 pv = vnodeIdToBitNumber(LFVnode);
3996 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4000 /* Go to the root dir and add this entry. The link count of the
4001 * root dir was incremented when ".." was created. Try 10 times.
4003 for (j = 0; j < 10; j++) {
4004 pa.Vnode = ThisVnode;
4005 pa.Unique = ThisUnique;
4007 snprintf(npath, sizeof npath, "%s.%u.%u",
4008 ((class == vLarge) ? "__ORPHANDIR__"
4009 : "__ORPHANFILE__"),
4010 ThisVnode, ThisUnique);
4012 CopyOnWrite(salvinfo, &rootdir);
4013 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4017 ThisUnique += 50; /* Try creating a different file */
4019 osi_Assert(code == 0);
4020 Log("Attaching orphaned %s to volume's root dir as %s\n",
4021 ((class == vLarge) ? "directory" : "file"), npath);
4023 } /* for each vnode in the class */
4024 } /* for each class of vnode */
4026 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4028 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4030 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4032 osi_Assert(code == 0);
4033 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4036 DFlush(); /* Flush the changes */
4037 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4038 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4039 orphans = ORPH_IGNORE;
4042 /* Write out all changed vnodes. Orphaned files and directories
4043 * will get removed here also (if requested).
4045 for (class = 0; class < nVNODECLASSES; class++) {
4046 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4047 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4048 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4049 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4050 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4051 for (i = 0; i < nVnodes; i++) {
4052 struct VnodeEssence *vnp = &vnodes[i];
4053 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4055 /* If the vnode is good but is unclaimed (not listed in
4056 * any directory entries), then it is orphaned.
4059 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4060 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4064 if (vnp->changed || vnp->count) {
4067 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4068 vnodeIndexOffset(vcp, vnodeNumber),
4069 (char *)&vnode, sizeof(vnode));
4070 osi_Assert(nBytes == sizeof(vnode));
4072 vnode.parent = vnp->parent;
4073 oldCount = vnode.linkCount;
4074 vnode.linkCount = vnode.linkCount - vnp->count;
4077 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4079 if (!vnp->todelete) {
4080 /* Orphans should have already been attached (if requested) */
4081 osi_Assert(orphans != ORPH_ATTACH);
4082 oblocks += vnp->blockCount;
4085 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4087 BlocksInVolume -= vnp->blockCount;
4089 if (VNDISK_GET_INO(&vnode)) {
4091 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4092 osi_Assert(code == 0);
4094 memset(&vnode, 0, sizeof(vnode));
4096 } else if (vnp->count) {
4098 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4101 vnode.modeBits = vnp->modeBits;
4104 vnode.dataVersion++;
4107 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4108 vnodeIndexOffset(vcp, vnodeNumber),
4109 (char *)&vnode, sizeof(vnode));
4110 osi_Assert(nBytes == sizeof(vnode));
4112 salvinfo->VolumeChanged = 1;
4116 if (!Showmode && ofiles) {
4117 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4119 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4123 for (class = 0; class < nVNODECLASSES; class++) {
4124 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4125 for (i = 0; i < vip->nVnodes; i++)
4126 if (vip->vnodes[i].name)
4127 free(vip->vnodes[i].name);
4134 /* Set correct resource utilization statistics */
4135 volHeader.filecount = FilesInVolume;
4136 volHeader.diskused = BlocksInVolume;
4138 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4139 if (volHeader.uniquifier < (maxunique + 1)) {
4141 Log("Volume uniquifier is too low; fixed\n");
4142 /* Plus 2,000 in case there are workstations out there with
4143 * cached vnodes that have since been deleted
4145 volHeader.uniquifier = (maxunique + 1 + 2000);
4149 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4150 "Only use this salvaged volume to copy data to another volume; "
4151 "do not continue to use this volume (%lu) as-is.\n",
4152 afs_printable_uint32_lu(vid));
4155 if (!Testing && salvinfo->VolumeChanged) {
4156 #ifdef FSSYNC_BUILD_CLIENT
4157 if (salvinfo->useFSYNC) {
4158 afs_int32 fsync_code;
4160 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4162 Log("Error trying to tell the fileserver to break callbacks for "
4163 "changed volume %lu; error code %ld\n",
4164 afs_printable_uint32_lu(vid),
4165 afs_printable_int32_ld(fsync_code));
4167 salvinfo->VolumeChanged = 0;
4170 #endif /* FSSYNC_BUILD_CLIENT */
4172 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4173 if (!salvinfo->useFSYNC) {
4174 /* A volume's contents have changed, but the fileserver will not
4175 * break callbacks on the volume until it tries to load the vol
4176 * header. So, to reduce the amount of time a client could have
4177 * stale data, remove fsstate.dat, so the fileserver will init
4178 * callback state with all clients. This is a very coarse hammer,
4179 * and in the future we should just record which volumes have
4181 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4182 if (code && errno != ENOENT) {
4183 Log("Error %d when trying to unlink FS state file %s\n", errno,
4184 AFSDIR_SERVER_FSSTATE_FILEPATH);
4190 /* Turn off the inUse bit; the volume's been salvaged! */
4191 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4192 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4193 volHeader.inService = 1; /* allow service again */
4194 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4195 volHeader.dontSalvage = DONT_SALVAGE;
4196 salvinfo->VolumeChanged = 0;
4198 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4199 osi_Assert(nBytes == sizeof(volHeader));
4202 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4203 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4204 FilesInVolume, BlocksInVolume);
4207 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4208 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4214 ClearROInUseBit(struct VolumeSummary *summary)
4216 IHandle_t *h = summary->volumeInfoHandle;
4217 afs_sfsize_t nBytes;
4219 VolumeDiskData volHeader;
4221 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4222 osi_Assert(nBytes == sizeof(volHeader));
4223 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4224 volHeader.inUse = 0;
4225 volHeader.needsSalvaged = 0;
4226 volHeader.inService = 1;
4227 volHeader.dontSalvage = DONT_SALVAGE;
4229 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4230 osi_Assert(nBytes == sizeof(volHeader));
4235 * Possible delete the volume.
4237 * deleteMe - Always do so, only a partial volume.
4240 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4241 char *message, int deleteMe, int check)
4243 if (readOnly(isp) || deleteMe) {
4244 if (isp->volSummary && isp->volSummary->fileName) {
4247 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4249 Log("It will be deleted on this server (you may find it elsewhere)\n");
4252 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4254 Log("it will be deleted instead. It should be recloned.\n");
4259 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, isp->volSummary->fileName);
4261 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4263 Log("Error %ld destroying volume disk header for volume %lu\n",
4264 afs_printable_int32_ld(code),
4265 afs_printable_uint32_lu(isp->volumeId));
4268 /* make sure we actually delete the fileName file; ENOENT
4269 * is fine, since VDestroyVolumeDiskHeader probably already
4271 if (unlink(path) && errno != ENOENT) {
4272 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4274 if (salvinfo->useFSYNC) {
4275 AskDelete(salvinfo, isp->volumeId);
4277 isp->volSummary->deleted = 1;
4280 } else if (!check) {
4281 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4283 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4287 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4289 * Locks a volume on disk for salvaging.
4291 * @param[in] volumeId volume ID to lock
4293 * @return operation status
4295 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4296 * checked out and locked again
4301 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4306 /* should always be WRITE_LOCK, but keep the lock-type logic all
4307 * in one place, in VVolLockType. Params will be ignored, but
4308 * try to provide what we're logically doing. */
4309 locktype = VVolLockType(V_VOLUPD, 1);
4311 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4313 if (code == EBUSY) {
4314 Abort("Someone else appears to be using volume %lu; Aborted\n",
4315 afs_printable_uint32_lu(volumeId));
4317 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4318 afs_printable_int32_ld(code),
4319 afs_printable_uint32_lu(volumeId));
4322 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4323 if (code == SYNC_DENIED) {
4324 /* need to retry checking out volumes */
4327 if (code != SYNC_OK) {
4328 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4329 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4332 /* set inUse = programType in the volume header to ensure that nobody
4333 * tries to use this volume again without salvaging, if we somehow crash
4334 * or otherwise exit before finishing the salvage.
4338 struct VolumeHeader header;
4339 struct VolumeDiskHeader diskHeader;
4340 struct VolumeDiskData volHeader;
4342 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4347 DiskToVolumeHeader(&header, &diskHeader);
4349 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4350 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4351 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4357 volHeader.inUse = programType;
4359 /* If we can't re-write the header, bail out and error. We don't
4360 * assert when reading the header, since it's possible the
4361 * header isn't really there (when there's no data associated
4362 * with the volume; we just delete the vol header file in that
4363 * case). But if it's there enough that we can read it, but
4364 * somehow we cannot write to it to signify we're salvaging it,
4365 * we've got a big problem and we cannot continue. */
4366 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4373 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4376 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4378 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4380 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4381 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4382 if (code != SYNC_OK) {
4383 Log("AskError: failed to force volume %lu into error state; "
4384 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4385 (long)code, SYNC_res2string(code));
4387 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4391 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4396 memset(&res, 0, sizeof(res));
4398 for (i = 0; i < 3; i++) {
4399 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4400 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4402 if (code == SYNC_OK) {
4404 } else if (code == SYNC_DENIED) {
4406 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4408 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4409 Abort("Salvage aborted\n");
4410 } else if (code == SYNC_BAD_COMMAND) {
4411 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4414 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4415 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4417 Log("AskOffline: fileserver is DAFS but we are not.\n");
4420 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4421 Log("AskOffline: fileserver is not DAFS but we are.\n");
4423 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4426 Abort("Salvage aborted\n");
4429 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4430 FSYNC_clientFinis();
4434 if (code != SYNC_OK) {
4435 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4436 Abort("Salvage aborted\n");
4440 /* don't want to pass around state; remember it here */
4441 static int isDAFS = -1;
4445 afs_int32 code, i, ret = 0;
4448 /* we don't care if we race. the answer shouldn't change */
4452 memset(&res, 0, sizeof(res));
4454 for (i = 0; i < 3; i++) {
4455 code = FSYNC_VolOp(1, NULL,
4456 FSYNC_VOL_QUERY_VOP, FSYNC_SALVAGE, &res);
4458 if (code == SYNC_OK) {
4461 } else if (code == SYNC_DENIED) {
4464 } else if (code == SYNC_BAD_COMMAND) {
4467 } else if (code == SYNC_FAILED) {
4468 if (res.hdr.reason == FSYNC_UNKNOWN_VOLID)
4475 Log("AskDAFS: request to query fileserver failed; trying again...\n");
4476 FSYNC_clientFinis();
4486 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4488 struct VolumeDiskHeader diskHdr;
4490 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4492 /* volume probably does not exist; no need to bring back online */
4495 AskOnline(salvinfo, volumeId);
4499 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4503 for (i = 0; i < 3; i++) {
4504 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4505 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4507 if (code == SYNC_OK) {
4509 } else if (code == SYNC_DENIED) {
4510 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4511 } else if (code == SYNC_BAD_COMMAND) {
4512 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4514 Log("AskOnline: please make sure file server binaries are same version.\n");
4518 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4519 FSYNC_clientFinis();
4526 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4531 for (i = 0; i < 3; i++) {
4532 memset(&res, 0, sizeof(res));
4533 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4534 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4536 if (code == SYNC_OK) {
4538 } else if (code == SYNC_DENIED) {
4539 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4540 } else if (code == SYNC_BAD_COMMAND) {
4541 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4544 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4545 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4547 Log("AskOnline: fileserver is DAFS but we are not.\n");
4550 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4551 Log("AskOnline: fileserver is not DAFS but we are.\n");
4553 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4557 } else if (code == SYNC_FAILED &&
4558 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4559 res.hdr.reason == FSYNC_WRONG_PART)) {
4560 /* volume is already effectively 'deleted' */
4564 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4565 FSYNC_clientFinis();
4572 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4574 /* Volume parameter is passed in case iopen is upgraded in future to
4575 * require a volume Id to be passed
4578 IHandle_t *srcH, *destH;
4579 FdHandle_t *srcFdP, *destFdP;
4581 afs_foff_t size = 0;
4583 IH_INIT(srcH, device, rwvolume, inode1);
4584 srcFdP = IH_OPEN(srcH);
4585 osi_Assert(srcFdP != NULL);
4586 IH_INIT(destH, device, rwvolume, inode2);
4587 destFdP = IH_OPEN(destH);
4588 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4589 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4592 osi_Assert(nBytes == 0);
4593 FDH_REALLYCLOSE(srcFdP);
4594 FDH_REALLYCLOSE(destFdP);
4601 PrintInodeList(struct SalvInfo *salvinfo)
4603 struct ViceInodeInfo *ip;
4604 struct ViceInodeInfo *buf;
4607 afs_sfsize_t st_size;
4609 st_size = OS_SIZE(salvinfo->inodeFd);
4610 osi_Assert(st_size >= 0);
4611 buf = (struct ViceInodeInfo *)malloc(st_size);
4612 osi_Assert(buf != NULL);
4613 nInodes = st_size / sizeof(struct ViceInodeInfo);
4614 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4615 for (ip = buf; nInodes--; ip++) {
4616 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4617 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4618 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4619 ip->u.param[2], ip->u.param[3]);
4625 PrintInodeSummary(struct SalvInfo *salvinfo)
4628 struct InodeSummary *isp;
4630 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4631 isp = &salvinfo->inodeSummary[i];
4632 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4637 PrintVolumeSummary(struct SalvInfo *salvinfo)
4640 struct VolumeSummary *vsp;
4642 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; vsp++, i++) {
4643 Log("fileName:%s, header, wouldNeedCallback\n", vsp->fileName);
4653 osi_Assert(0); /* Fork is never executed in the NT code path */
4657 #ifdef AFS_DEMAND_ATTACH_FS
4658 if ((f == 0) && (programType == salvageServer)) {
4659 /* we are a salvageserver child */
4660 #ifdef FSSYNC_BUILD_CLIENT
4661 VChildProcReconnectFS_r();
4663 #ifdef SALVSYNC_BUILD_CLIENT
4667 #endif /* AFS_DEMAND_ATTACH_FS */
4668 #endif /* !AFS_NT40_ENV */
4678 #ifdef AFS_DEMAND_ATTACH_FS
4679 if (programType == salvageServer) {
4680 #ifdef SALVSYNC_BUILD_CLIENT
4683 #ifdef FSSYNC_BUILD_CLIENT
4687 #endif /* AFS_DEMAND_ATTACH_FS */
4690 if (main_thread != pthread_self())
4691 pthread_exit((void *)code);
4704 pid = wait(&status);
4705 osi_Assert(pid != -1);
4706 if (WCOREDUMP(status))
4707 Log("\"%s\" core dumped!\n", prog);
4708 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4714 TimeStamp(time_t clock, int precision)
4717 static char timestamp[20];
4718 lt = localtime(&clock);
4720 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4722 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4727 CheckLogFile(char * log_path)
4729 char oldSlvgLog[AFSDIR_PATH_MAX];
4731 #ifndef AFS_NT40_ENV
4738 strcpy(oldSlvgLog, log_path);
4739 strcat(oldSlvgLog, ".old");
4741 renamefile(log_path, oldSlvgLog);
4742 logFile = afs_fopen(log_path, "a");
4744 if (!logFile) { /* still nothing, use stdout */
4748 #ifndef AFS_NAMEI_ENV
4749 AFS_DEBUG_IOPS_LOG(logFile);
4754 #ifndef AFS_NT40_ENV
4756 TimeStampLogFile(char * log_path)
4758 char stampSlvgLog[AFSDIR_PATH_MAX];
4763 lt = localtime(&now);
4764 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4765 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4766 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4767 lt->tm_min, lt->tm_sec);
4769 /* try to link the logfile to a timestamped filename */
4770 /* if it fails, oh well, nothing we can do */
4771 link(log_path, stampSlvgLog);
4780 #ifndef AFS_NT40_ENV
4782 printf("Can't show log since using syslog.\n");
4793 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4796 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4799 while (fgets(line, sizeof(line), logFile))
4806 Log(const char *format, ...)
4812 va_start(args, format);
4813 vsnprintf(tmp, sizeof tmp, format, args);
4815 #ifndef AFS_NT40_ENV
4817 syslog(LOG_INFO, "%s", tmp);
4821 gettimeofday(&now, NULL);
4822 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4828 Abort(const char *format, ...)
4833 va_start(args, format);
4834 vsnprintf(tmp, sizeof tmp, format, args);
4836 #ifndef AFS_NT40_ENV
4838 syslog(LOG_INFO, "%s", tmp);
4842 fprintf(logFile, "%s", tmp);
4854 ToString(const char *s)
4857 p = (char *)malloc(strlen(s) + 1);
4858 osi_Assert(p != NULL);
4863 /* Remove the FORCESALVAGE file */
4865 RemoveTheForce(char *path)
4868 struct afs_stat_st force; /* so we can use afs_stat to find it */
4869 strcpy(target,path);
4870 strcat(target,"/FORCESALVAGE");
4871 if (!Testing && ForceSalvage) {
4872 if (afs_stat(target,&force) == 0) unlink(target);
4876 #ifndef AFS_AIX32_ENV
4878 * UseTheForceLuke - see if we can use the force
4881 UseTheForceLuke(char *path)
4883 struct afs_stat_st force;
4885 strcpy(target,path);
4886 strcat(target,"/FORCESALVAGE");
4888 return (afs_stat(target, &force) == 0);
4892 * UseTheForceLuke - see if we can use the force
4895 * The VRMIX fsck will not muck with the filesystem it is supposedly
4896 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4897 * muck directly with the root inode, which is within the normal
4899 * ListViceInodes() has a side effect of setting ForceSalvage if
4900 * it detects a need, based on root inode examination.
4903 UseTheForceLuke(char *path)
4906 return 0; /* sorry OB1 */
4911 /* NT support routines */
4913 static char execpathname[MAX_PATH];
4915 nt_SalvagePartition(char *partName, int jobn)
4920 if (!*execpathname) {
4921 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4922 if (!n || n == 1023)
4925 job.cj_magic = SALVAGER_MAGIC;
4926 job.cj_number = jobn;
4927 (void)strcpy(job.cj_part, partName);
4928 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4933 nt_SetupPartitionSalvage(void *datap, int len)
4935 childJob_t *jobp = (childJob_t *) datap;
4936 char logname[AFSDIR_PATH_MAX];
4938 if (len != sizeof(childJob_t))
4940 if (jobp->cj_magic != SALVAGER_MAGIC)
4945 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4947 logFile = afs_fopen(logname, "w");
4955 #endif /* AFS_NT40_ENV */