2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #include <afs/afsint.h>
104 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
105 #if defined(AFS_VFSINCL_ENV)
106 #include <sys/vnode.h>
108 #include <sys/fs/ufs_inode.h>
110 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
111 #include <ufs/ufs/dinode.h>
112 #include <ufs/ffs/fs.h>
114 #include <ufs/inode.h>
117 #else /* AFS_VFSINCL_ENV */
119 #include <ufs/inode.h>
120 #else /* AFS_OSF_ENV */
121 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
122 #include <sys/inode.h>
125 #endif /* AFS_VFSINCL_ENV */
126 #endif /* AFS_SGI_ENV */
129 #include <sys/lockf.h>
132 #include <checklist.h>
134 #if defined(AFS_SGI_ENV)
137 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
139 #include <sys/mnttab.h>
140 #include <sys/mntent.h>
145 #endif /* AFS_SGI_ENV */
146 #endif /* AFS_HPUX_ENV */
150 #include <afs/osi_inode.h>
154 #include <afs/afsutil.h>
155 #include <afs/fileutil.h>
160 #include <afs/afssyscalls.h>
164 #include "partition.h"
165 #include "daemon_com.h"
166 #include "daemon_com_inline.h"
168 #include "fssync_inline.h"
169 #include "volume_inline.h"
170 #include "salvsync.h"
171 #include "viceinode.h"
173 #include "volinodes.h" /* header magic number, etc. stuff */
174 #include "vol-salvage.h"
176 #include "vol_internal.h"
178 #include <afs/prs_fs.h>
180 #ifdef FSSYNC_BUILD_CLIENT
181 #include "vg_cache.h"
189 extern void *calloc();
191 static char *TimeStamp(time_t clock, int precision);
194 int debug; /* -d flag */
195 extern int Testing; /* -n flag */
196 int ListInodeOption; /* -i flag */
197 int ShowRootFiles; /* -r flag */
198 int RebuildDirs; /* -sal flag */
199 int Parallel = 4; /* -para X flag */
200 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
201 int forceR = 0; /* -b flag */
202 int ShowLog = 0; /* -showlog flag */
203 int ShowSuid = 0; /* -showsuid flag */
204 int ShowMounts = 0; /* -showmounts flag */
205 int orphans = ORPH_IGNORE; /* -orphans option */
210 int useSyslog = 0; /* -syslog flag */
211 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
220 #define MAXPARALLEL 32
222 int OKToZap; /* -o flag */
223 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
224 * in the volume header */
226 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
228 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
231 * information that is 'global' to a particular salvage job.
234 Device fileSysDevice; /**< The device number of the current partition
236 char fileSysPath[9]; /**< The path of the mounted partition currently
237 * being salvaged, i.e. the directory containing
238 * the volume headers */
239 char *fileSysPathName; /**< NT needs this to make name pretty log. */
240 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
241 int VGLinkH_cnt; /**< # of references to lnk handle. */
242 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
245 char *fileSysDeviceName; /**< The block device where the file system being
246 * salvaged was mounted */
247 char *filesysfulldev;
249 int VolumeChanged; /**< Set by any routine which would change the
250 * volume in a way which would require callbacks
251 * to be broken if the volume was put back on
252 * on line by an active file server */
254 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
255 * header dealt with */
257 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
258 FD_t inodeFd; /**< File descriptor for inode file */
260 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
261 int nVolumes; /**< Number of volumes (read-write and read-only)
262 * in volume summary */
263 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
266 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
267 * vnodes in the volume that
268 * we are currently looking
270 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
271 * to contact the fileserver over FSYNC */
278 /* Forward declarations */
279 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
280 static int AskVolumeSummary(struct SalvInfo *salvinfo,
281 VolumeId singleVolumeNumber);
282 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
283 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
285 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
286 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
287 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
289 /* Uniquifier stored in the Inode */
294 return (u & 0x3fffff);
296 #if defined(AFS_SGI_EXMAG)
297 return (u & SGI_UNIQMASK);
300 #endif /* AFS_SGI_EXMAG */
307 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
309 return 0; /* otherwise may be transient, e.g. EMFILE */
314 char *save_args[MAX_ARGS];
316 extern pthread_t main_thread;
317 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
321 * Get the salvage lock if not already held. Hold until process exits.
323 * @param[in] locktype READ_LOCK or WRITE_LOCK
326 _ObtainSalvageLock(int locktype)
328 struct VLockFile salvageLock;
333 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
335 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
338 "salvager: There appears to be another salvager running! "
343 "salvager: Error %d trying to acquire salvage lock! "
349 ObtainSalvageLock(void)
351 _ObtainSalvageLock(WRITE_LOCK);
354 ObtainSharedSalvageLock(void)
356 _ObtainSalvageLock(READ_LOCK);
360 #ifdef AFS_SGI_XFS_IOPS_ENV
361 /* Check if the given partition is mounted. For XFS, the root inode is not a
362 * constant. So we check the hard way.
365 IsPartitionMounted(char *part)
368 struct mntent *mntent;
370 osi_Assert(mntfp = setmntent(MOUNTED, "r"));
371 while (mntent = getmntent(mntfp)) {
372 if (!strcmp(part, mntent->mnt_dir))
377 return mntent ? 1 : 1;
380 /* Check if the given inode is the root of the filesystem. */
381 #ifndef AFS_SGI_XFS_IOPS_ENV
383 IsRootInode(struct afs_stat_st *status)
386 * The root inode is not a fixed value in XFS partitions. So we need to
387 * see if the partition is in the list of mounted partitions. This only
388 * affects the SalvageFileSys path, so we check there.
390 return (status->st_ino == ROOTINODE);
395 #ifndef AFS_NAMEI_ENV
396 /* We don't want to salvage big files filesystems, since we can't put volumes on
400 CheckIfBigFilesFS(char *mountPoint, char *devName)
402 struct superblock fs;
405 if (strncmp(devName, "/dev/", 5)) {
406 (void)sprintf(name, "/dev/%s", devName);
408 (void)strcpy(name, devName);
411 if (ReadSuper(&fs, name) < 0) {
412 Log("Unable to read superblock. Not salvaging partition %s.\n",
416 if (IsBigFilesFileSystem(&fs)) {
417 Log("Partition %s is a big files filesystem, not salvaging.\n",
427 #define HDSTR "\\Device\\Harddisk"
428 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
430 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
436 static int dowarn = 1;
438 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
440 if (strncmp(res1, HDSTR, HDLEN)) {
443 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
444 res1, HDSTR, p1->devName);
447 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
449 if (strncmp(res2, HDSTR, HDLEN)) {
452 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
453 res2, HDSTR, p2->devName);
457 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
460 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
463 /* This assumes that two partitions with the same device number divided by
464 * PartsPerDisk are on the same disk.
467 SalvageFileSysParallel(struct DiskPartition64 *partP)
470 struct DiskPartition64 *partP;
471 int pid; /* Pid for this job */
472 int jobnumb; /* Log file job number */
473 struct job *nextjob; /* Next partition on disk to salvage */
475 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
476 struct job *thisjob = 0;
477 static int numjobs = 0;
478 static int jobcount = 0;
484 char logFileName[256];
488 /* We have a partition to salvage. Copy it into thisjob */
489 thisjob = calloc(1, sizeof(struct job));
491 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
494 thisjob->partP = partP;
495 thisjob->jobnumb = jobcount;
497 } else if (jobcount == 0) {
498 /* We are asking to wait for all jobs (partp == 0), yet we never
501 Log("No file system partitions named %s* found; not salvaged\n",
502 VICE_PARTITION_PREFIX);
506 if (debug || Parallel == 1) {
508 SalvageFileSys(thisjob->partP, 0);
515 /* Check to see if thisjob is for a disk that we are already
516 * salvaging. If it is, link it in as the next job to do. The
517 * jobs array has 1 entry per disk being salvages. numjobs is
518 * the total number of disks currently being salvaged. In
519 * order to keep thejobs array compact, when a disk is
520 * completed, the hightest element in the jobs array is moved
521 * down to now open slot.
523 for (j = 0; j < numjobs; j++) {
524 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
525 /* On same disk, add it to this list and return */
526 thisjob->nextjob = jobs[j]->nextjob;
527 jobs[j]->nextjob = thisjob;
534 /* Loop until we start thisjob or until all existing jobs are finished */
535 while (thisjob || (!partP && (numjobs > 0))) {
536 startjob = -1; /* No new job to start */
538 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
539 /* Either the max jobs are running or we have to wait for all
540 * the jobs to finish. In either case, we wait for at least one
541 * job to finish. When it's done, clean up after it.
543 pid = wait(&wstatus);
544 osi_Assert(pid != -1);
545 for (j = 0; j < numjobs; j++) { /* Find which job it is */
546 if (pid == jobs[j]->pid)
549 osi_Assert(j < numjobs);
550 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
551 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
554 numjobs--; /* job no longer running */
555 oldjob = jobs[j]; /* remember */
556 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
557 free(oldjob); /* free the old job */
559 /* If there is another partition on the disk to salvage, then
560 * say we will start it (startjob). If not, then put thisjob there
561 * and say we will start it.
563 if (jobs[j]) { /* Another partitions to salvage */
564 startjob = j; /* Will start it */
565 } else { /* There is not another partition to salvage */
567 jobs[j] = thisjob; /* Add thisjob */
569 startjob = j; /* Will start it */
571 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
572 startjob = -1; /* Don't start it - already running */
576 /* We don't have to wait for a job to complete */
578 jobs[numjobs] = thisjob; /* Add this job */
580 startjob = numjobs; /* Will start it */
584 /* Start up a new salvage job on a partition in job slot "startjob" */
585 if (startjob != -1) {
587 Log("Starting salvage of file system partition %s\n",
588 jobs[startjob]->partP->name);
590 /* For NT, we not only fork, but re-exec the salvager. Pass in the
591 * commands and pass the child job number via the data path.
594 nt_SalvagePartition(jobs[startjob]->partP->name,
595 jobs[startjob]->jobnumb);
596 jobs[startjob]->pid = pid;
601 jobs[startjob]->pid = pid;
607 for (fd = 0; fd < 16; fd++)
614 openlog("salvager", LOG_PID, useSyslogFacility);
618 snprintf(logFileName, sizeof logFileName, "%s.%d",
619 AFSDIR_SERVER_SLVGLOG_FILEPATH,
620 jobs[startjob]->jobnumb);
621 logFile = afs_fopen(logFileName, "w");
626 SalvageFileSys1(jobs[startjob]->partP, 0);
631 } /* while ( thisjob || (!partP && numjobs > 0) ) */
633 /* If waited for all jobs to complete, now collect log files and return */
635 if (!useSyslog) /* if syslogging - no need to collect */
638 for (i = 0; i < jobcount; i++) {
639 snprintf(logFileName, sizeof logFileName, "%s.%d",
640 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
641 if ((passLog = afs_fopen(logFileName, "r"))) {
642 while (fgets(buf, sizeof(buf), passLog)) {
647 (void)unlink(logFileName);
656 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
658 if (!canfork || debug || Fork() == 0) {
659 SalvageFileSys1(partP, singleVolumeNumber);
660 if (canfork && !debug) {
665 Wait("SalvageFileSys");
669 get_DevName(char *pbuffer, char *wpath)
671 char pbuf[128], *ptr;
672 strcpy(pbuf, pbuffer);
673 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
679 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
681 strcpy(pbuffer, ptr + 1);
688 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
691 char inodeListPath[256];
692 FD_t inodeFile = INVALID_FD;
693 static char tmpDevName[100];
694 static char wpath[100];
695 struct VolumeSummary *vsp, *esp;
699 struct SalvInfo l_salvinfo;
700 struct SalvInfo *salvinfo = &l_salvinfo;
703 memset(salvinfo, 0, sizeof(*salvinfo));
706 if (inodeFile != INVALID_FD) {
708 inodeFile = INVALID_FD;
710 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
711 Abort("Raced too many times with fileserver restarts while trying to "
712 "checkout/lock volumes; Aborted\n");
714 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
716 /* unlock all previous volume locks, since we're about to lock them
718 VLockFileReinit(&partP->volLockFile);
720 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
722 salvinfo->fileSysPartition = partP;
723 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
724 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
727 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
728 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
729 name = partP->devName;
731 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
732 strcpy(tmpDevName, partP->devName);
733 name = get_DevName(tmpDevName, wpath);
734 salvinfo->fileSysDeviceName = name;
735 salvinfo->filesysfulldev = wpath;
738 if (singleVolumeNumber) {
739 #if !(defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL))
740 /* only non-DAFS locks the partition when salvaging a single volume;
741 * DAFS will lock the individual volumes in the VG */
742 VLockPartition(partP->name);
743 #endif /* !(AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL) */
747 /* salvageserver already setup fssync conn for us */
748 if ((programType != salvageServer) && !VConnectFS()) {
749 Abort("Couldn't connect to file server\n");
752 salvinfo->useFSYNC = 1;
753 AskOffline(salvinfo, singleVolumeNumber);
754 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
755 if (LockVolume(salvinfo, singleVolumeNumber)) {
758 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
761 salvinfo->useFSYNC = 0;
762 VLockPartition(partP->name);
766 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
769 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
770 partP->name, name, (Testing ? "(READONLY mode)" : ""));
772 Log("***Forced salvage of all volumes on this partition***\n");
777 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
784 osi_Assert((dirp = opendir(salvinfo->fileSysPath)) != NULL);
785 while ((dp = readdir(dirp))) {
786 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
787 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
789 Log("Removing old salvager temp files %s\n", dp->d_name);
790 strcpy(npath, salvinfo->fileSysPath);
791 strcat(npath, OS_DIRSEP);
792 strcat(npath, dp->d_name);
798 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
800 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
801 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
803 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
807 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
808 if (inodeFile == INVALID_FD) {
809 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
812 /* Using nt_unlink here since we're really using the delete on close
813 * semantics of unlink. In most places in the salvager, we really do
814 * mean to unlink the file at that point. Those places have been
815 * modified to actually do that so that the NT crt can be used there.
817 * jaltman - On NT delete on close cannot be applied to a file while the
818 * process has an open file handle that does not have DELETE file
819 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
820 * delete privileges. As a result the nt_unlink() call will always
823 code = nt_unlink(inodeListPath);
825 code = unlink(inodeListPath);
828 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
831 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
835 salvinfo->inodeFd = inodeFile;
836 if (salvinfo->inodeFd == INVALID_FD)
837 Abort("Temporary file %s is missing...\n", inodeListPath);
838 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
839 if (ListInodeOption) {
840 PrintInodeList(salvinfo);
841 if (singleVolumeNumber) {
842 /* We've checked out the volume from the fileserver, and we need
843 * to give it back. We don't know if the volume exists or not,
844 * so we don't know whether to AskOnline or not. Try to determine
845 * if the volume exists by trying to read the volume header, and
846 * AskOnline if it is readable. */
847 MaybeAskOnline(salvinfo, singleVolumeNumber);
851 /* enumerate volumes in the partition.
852 * figure out sets of read-only + rw volumes.
853 * salvage each set, read-only volumes first, then read-write.
854 * Fix up inodes on last volume in set (whether it is read-write
857 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
861 if (singleVolumeNumber) {
862 /* If we delete a volume during the salvage, we indicate as such by
863 * setting the volsummary->deleted field. We need to know if we
864 * deleted a volume or not in order to know which volumes to bring
865 * back online after the salvage. If we fork, we will lose this
866 * information, since volsummary->deleted will not get set in the
867 * parent. So, don't fork. */
871 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
872 i < salvinfo->nVolumesInInodeFile; i = j) {
873 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
875 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
877 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
878 struct VolumeSummary *tsp;
879 /* Scan volume list (from partition root directory) looking for the
880 * current rw volume number in the volume list from the inode scan.
881 * If there is one here that is not in the inode volume list,
883 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
885 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
887 /* Now match up the volume summary info from the root directory with the
888 * entry in the volume list obtained from scanning inodes */
889 salvinfo->inodeSummary[j].volSummary = NULL;
890 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
891 if (tsp->header.id == vid) {
892 salvinfo->inodeSummary[j].volSummary = tsp;
898 /* Salvage the group of volumes (several read-only + 1 read/write)
899 * starting with the current read-only volume we're looking at.
902 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
904 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
905 #endif /* AFS_NT40_ENV */
909 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
910 for (; vsp < esp; vsp++) {
912 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
915 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
916 RemoveTheForce(salvinfo->fileSysPath);
918 if (!Testing && singleVolumeNumber) {
920 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
921 /* unlock vol headers so the fs can attach them when we AskOnline */
922 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
923 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
925 /* Step through the volumeSummary list and set all volumes on-line.
926 * Most volumes were taken off-line in GetVolumeSummary.
927 * If a volume was deleted, don't tell the fileserver anything, since
928 * we already told the fileserver the volume was deleted back when we
929 * we destroyed the volume header.
930 * Also, make sure we bring the singleVolumeNumber back online first.
933 for (j = 0; j < salvinfo->nVolumes; j++) {
934 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
936 if (!salvinfo->volumeSummaryp[j].deleted) {
937 AskOnline(salvinfo, singleVolumeNumber);
943 /* If singleVolumeNumber is not in our volumeSummary, it means that
944 * at least one other volume in the VG is on the partition, but the
945 * RW volume is not. We've already AskOffline'd it by now, though,
946 * so make sure we don't still have the volume checked out. */
947 AskDelete(salvinfo, singleVolumeNumber);
950 for (j = 0; j < salvinfo->nVolumes; j++) {
951 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
952 if (!salvinfo->volumeSummaryp[j].deleted) {
953 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
959 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
960 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
963 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
967 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
970 char filename[VMAXPATHLEN];
976 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
977 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
980 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
983 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
985 Log("Error %ld destroying volume disk header for volume %lu\n",
986 afs_printable_int32_ld(code),
987 afs_printable_uint32_lu(vsp->header.id));
990 /* make sure we actually delete the header file; ENOENT
991 * is fine, since VDestroyVolumeDiskHeader probably already
993 if (unlink(path) && errno != ENOENT) {
994 Log("Unable to unlink %s (errno = %d)\n", path, errno);
996 if (salvinfo->useFSYNC) {
997 AskDelete(salvinfo, vsp->header.id);
1004 CompareInodes(const void *_p1, const void *_p2)
1006 const struct ViceInodeInfo *p1 = _p1;
1007 const struct ViceInodeInfo *p2 = _p2;
1008 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1009 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1010 VolumeId p1rwid, p2rwid;
1012 (p1->u.vnode.vnodeNumber ==
1013 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1015 (p2->u.vnode.vnodeNumber ==
1016 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1017 if (p1rwid < p2rwid)
1019 if (p1rwid > p2rwid)
1021 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1022 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1023 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1024 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1025 if (p1->u.vnode.volumeId == p1rwid)
1027 if (p2->u.vnode.volumeId == p2rwid)
1029 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1031 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1032 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1033 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1035 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1037 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1039 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1041 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1043 /* The following tests are reversed, so that the most desirable
1044 * of several similar inodes comes first */
1045 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1046 #ifdef AFS_3DISPARES
1047 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1048 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1051 #ifdef AFS_SGI_EXMAG
1052 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1053 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1058 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1059 #ifdef AFS_3DISPARES
1060 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1061 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1064 #ifdef AFS_SGI_EXMAG
1065 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1066 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1071 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1072 #ifdef AFS_3DISPARES
1073 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1074 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1077 #ifdef AFS_SGI_EXMAG
1078 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1079 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1084 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1085 #ifdef AFS_3DISPARES
1086 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1087 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1090 #ifdef AFS_SGI_EXMAG
1091 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1092 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1101 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1102 struct InodeSummary *summary)
1104 VolumeId volume = ip->u.vnode.volumeId;
1105 VolumeId rwvolume = volume;
1110 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1112 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1114 rwvolume = ip->u.special.parentId;
1115 /* This isn't quite right, as there could (in error) be different
1116 * parent inodes in different special vnodes */
1118 if (maxunique < ip->u.vnode.vnodeUniquifier)
1119 maxunique = ip->u.vnode.vnodeUniquifier;
1123 summary->volumeId = volume;
1124 summary->RWvolumeId = rwvolume;
1125 summary->nInodes = n;
1126 summary->nSpecialInodes = nSpecial;
1127 summary->maxUniquifier = maxunique;
1131 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, afs_uint32 singleVolumeNumber, void *rock)
1133 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1134 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1135 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1140 * Collect list of inodes in file named by path. If a truly fatal error,
1141 * unlink the file and abort. For lessor errors, return -1. The file will
1142 * be unlinked by the caller.
1145 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1149 struct ViceInodeInfo *ip, *ip_save;
1150 struct InodeSummary summary;
1151 char summaryFileName[50];
1152 FD_t summaryFile = INVALID_FD;
1154 char *dev = salvinfo->fileSysPath;
1155 char *wpath = salvinfo->fileSysPath;
1157 char *dev = salvinfo->fileSysDeviceName;
1158 char *wpath = salvinfo->filesysfulldev;
1160 char *part = salvinfo->fileSysPath;
1165 afs_sfsize_t st_size;
1167 /* This file used to come from vfsck; cobble it up ourselves now... */
1169 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1170 singleVolumeNumber ? OnlyOneVolume : 0,
1171 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1173 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1177 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1179 if (forceSal && !ForceSalvage) {
1180 Log("***Forced salvage of all volumes on this partition***\n");
1183 OS_SEEK(inodeFile, 0L, SEEK_SET);
1184 salvinfo->inodeFd = inodeFile;
1185 if (salvinfo->inodeFd == INVALID_FD ||
1186 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1187 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1189 tdir = (tmpdir ? tmpdir : part);
1191 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1192 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1194 snprintf(summaryFileName, sizeof summaryFileName,
1195 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1197 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1198 if (summaryFile == INVALID_FD) {
1199 Abort("Unable to create inode summary file\n");
1203 /* Using nt_unlink here since we're really using the delete on close
1204 * semantics of unlink. In most places in the salvager, we really do
1205 * mean to unlink the file at that point. Those places have been
1206 * modified to actually do that so that the NT crt can be used there.
1208 * jaltman - As commented elsewhere, this cannot work because fopen()
1209 * does not open files with DELETE and FILE_SHARE_DELETE.
1211 code = nt_unlink(summaryFileName);
1213 code = unlink(summaryFileName);
1216 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1219 if (!canfork || debug || Fork() == 0) {
1220 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1222 OS_CLOSE(summaryFile);
1223 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1224 RemoveTheForce(salvinfo->fileSysPath);
1226 struct VolumeSummary *vsp;
1230 GetVolumeSummary(salvinfo, singleVolumeNumber);
1232 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1234 if (vsp->header.id == singleVolumeNumber) {
1237 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1243 MaybeAskOnline(salvinfo, singleVolumeNumber);
1245 /* make sure we get rid of stray .vol headers, even if
1246 * they're not in our volume summary (might happen if
1247 * e.g. something else created them and they're not in the
1248 * fileserver VGC) */
1249 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1250 singleVolumeNumber, 0 /*parent*/);
1251 AskDelete(salvinfo, singleVolumeNumber);
1255 Log("%s vice inodes on %s; not salvaged\n",
1256 singleVolumeNumber ? "No applicable" : "No", dev);
1261 ip = (struct ViceInodeInfo *)malloc(nInodes*sizeof(struct ViceInodeInfo));
1263 OS_CLOSE(summaryFile);
1265 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1268 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1269 OS_CLOSE(summaryFile);
1270 Abort("Unable to read inode table; %s not salvaged\n", dev);
1272 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1273 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1274 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1275 OS_CLOSE(summaryFile);
1276 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1281 CountVolumeInodes(ip, nInodes, &summary);
1282 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1283 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1284 OS_CLOSE(summaryFile);
1288 summary.index += (summary.nInodes);
1289 nInodes -= summary.nInodes;
1290 ip += summary.nInodes;
1293 ip = ip_save = NULL;
1294 /* Following fflush is not fclose, because if it was debug mode would not work */
1295 if (OS_SYNC(summaryFile) == -1) {
1296 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1297 OS_CLOSE(summaryFile);
1301 if (canfork && !debug) {
1306 if (Wait("Inode summary") == -1) {
1307 OS_CLOSE(summaryFile);
1308 Exit(1); /* salvage of this partition aborted */
1312 st_size = OS_SIZE(summaryFile);
1313 osi_Assert(st_size >= 0);
1316 salvinfo->inodeSummary = (struct InodeSummary *)malloc(st_size);
1317 osi_Assert(salvinfo->inodeSummary != NULL);
1318 /* For GNU we need to do lseek to get the file pointer moved. */
1319 osi_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1320 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1321 osi_Assert(ret == st_size);
1323 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1324 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1325 salvinfo->inodeSummary[i].volSummary = NULL;
1327 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1328 OS_CLOSE(summaryFile);
1331 if (retcode && singleVolumeNumber && !deleted) {
1332 AskError(salvinfo, singleVolumeNumber);
1338 /* Comparison routine for volume sort.
1339 This is setup so that a read-write volume comes immediately before
1340 any read-only clones of that volume */
1342 CompareVolumes(const void *_p1, const void *_p2)
1344 const struct VolumeSummary *p1 = _p1;
1345 const struct VolumeSummary *p2 = _p2;
1346 if (p1->header.parent != p2->header.parent)
1347 return p1->header.parent < p2->header.parent ? -1 : 1;
1348 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1350 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1352 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1356 * Gleans volumeSummary information by asking the fileserver
1358 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1359 * salvaging a whole partition
1361 * @return whether we obtained the volume summary information or not
1362 * @retval 0 success; we obtained the volume summary information
1363 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1365 * @retval 1 we did not get the volume summary information; either the
1366 * fileserver responded with an error, or we are not supposed to
1367 * ask the fileserver for the information (e.g. we are salvaging
1368 * the entire partition or we are not the salvageserver)
1370 * @note for non-DAFS, always returns 1
1373 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1376 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1377 if (programType == salvageServer) {
1378 if (singleVolumeNumber) {
1379 FSSYNC_VGQry_response_t q_res;
1381 struct VolumeSummary *vsp;
1383 struct VolumeDiskHeader diskHdr;
1385 memset(&res, 0, sizeof(res));
1387 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1390 * We must wait for the partition to finish scanning before
1391 * can continue, since we will not know if we got the entire
1392 * VG membership unless the partition is fully scanned.
1393 * We could, in theory, just scan the partition ourselves if
1394 * the VG cache is not ready, but we would be doing the exact
1395 * same scan the fileserver is doing; it will almost always
1396 * be faster to wait for the fileserver. The only exceptions
1397 * are if the partition does not take very long to scan, and
1398 * in that case it's fast either way, so who cares?
1400 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1401 Log("waiting for fileserver to finish scanning partition %s...\n",
1402 salvinfo->fileSysPartition->name);
1404 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1405 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1406 * just so small partitions don't need to wait over 10
1407 * seconds every time, and large partitions are generally
1408 * polled only once every ten seconds. */
1409 sleep((i > 10) ? (i = 10) : i);
1411 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1415 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1416 /* This can happen if there's no header for the volume
1417 * we're salvaging, or no headers exist for the VG (if
1418 * we're salvaging an RW). Act as if we got a response
1419 * with no VG members. The headers may be created during
1420 * salvaging, if there are inodes in this VG. */
1422 memset(&q_res, 0, sizeof(q_res));
1423 q_res.rw = singleVolumeNumber;
1427 Log("fileserver refused VGCQuery request for volume %lu on "
1428 "partition %s, code %ld reason %ld\n",
1429 afs_printable_uint32_lu(singleVolumeNumber),
1430 salvinfo->fileSysPartition->name,
1431 afs_printable_int32_ld(code),
1432 afs_printable_int32_ld(res.hdr.reason));
1436 if (q_res.rw != singleVolumeNumber) {
1437 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1438 afs_printable_uint32_lu(singleVolumeNumber),
1439 afs_printable_uint32_lu(q_res.rw));
1440 #ifdef SALVSYNC_BUILD_CLIENT
1441 if (SALVSYNC_LinkVolume(q_res.rw,
1443 salvinfo->fileSysPartition->name,
1445 Log("schedule request failed\n");
1447 #endif /* SALVSYNC_BUILD_CLIENT */
1448 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1451 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1452 osi_Assert(salvinfo->volumeSummaryp != NULL);
1454 salvinfo->nVolumes = 0;
1455 vsp = salvinfo->volumeSummaryp;
1457 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1458 char name[VMAXPATHLEN];
1460 if (!q_res.children[i]) {
1464 /* AskOffline for singleVolumeNumber was called much earlier */
1465 if (q_res.children[i] != singleVolumeNumber) {
1466 AskOffline(salvinfo, q_res.children[i]);
1467 if (LockVolume(salvinfo, q_res.children[i])) {
1473 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1475 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1476 afs_printable_uint32_lu(q_res.children[i]));
1481 DiskToVolumeHeader(&vsp->header, &diskHdr);
1482 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1484 salvinfo->nVolumes++;
1488 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1493 Log("Cannot get volume summary from fileserver; falling back to scanning "
1494 "entire partition\n");
1497 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1502 * count how many volume headers are found by VWalkVolumeHeaders.
1504 * @param[in] dp the disk partition (unused)
1505 * @param[in] name full path to the .vol header (unused)
1506 * @param[in] hdr the header data (unused)
1507 * @param[in] last whether this is the last try or not (unused)
1508 * @param[in] rock actually an afs_int32*; the running count of how many
1509 * volumes we have found
1514 CountHeader(struct DiskPartition64 *dp, const char *name,
1515 struct VolumeDiskHeader *hdr, int last, void *rock)
1517 afs_int32 *nvols = (afs_int32 *)rock;
1523 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1526 struct SalvageScanParams {
1527 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1528 * vol id of the VG we're salvaging */
1529 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1530 * we're filling in */
1531 afs_int32 nVolumes; /**< # of vols we've encountered */
1532 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1533 * # of vols we've alloc'd memory for) */
1534 int retry; /**< do we need to retry vol lock/checkout? */
1535 struct SalvInfo *salvinfo; /**< salvage job info */
1539 * records volume summary info found from VWalkVolumeHeaders.
1541 * Found volumes are also taken offline if they are in the specific volume
1542 * group we are looking for.
1544 * @param[in] dp the disk partition
1545 * @param[in] name full path to the .vol header
1546 * @param[in] hdr the header data
1547 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1548 * @param[in] rock actually a struct SalvageScanParams*, containing the
1549 * information needed to record the volume summary data
1551 * @return operation status
1553 * @retval -1 volume locking raced with fileserver restart; checking out
1554 * and locking volumes needs to be retried
1555 * @retval 1 volume header is mis-named and should be deleted
1558 RecordHeader(struct DiskPartition64 *dp, const char *name,
1559 struct VolumeDiskHeader *hdr, int last, void *rock)
1561 char nameShouldBe[64];
1562 struct SalvageScanParams *params;
1563 struct VolumeSummary summary;
1564 VolumeId singleVolumeNumber;
1565 struct SalvInfo *salvinfo;
1567 params = (struct SalvageScanParams *)rock;
1569 memset(&summary, 0, sizeof(summary));
1571 singleVolumeNumber = params->singleVolumeNumber;
1572 salvinfo = params->salvinfo;
1574 DiskToVolumeHeader(&summary.header, hdr);
1576 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1577 && summary.header.parent != singleVolumeNumber) {
1579 if (programType == salvageServer) {
1580 #ifdef SALVSYNC_BUILD_CLIENT
1581 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1582 summary.header.id, summary.header.parent);
1583 if (SALVSYNC_LinkVolume(summary.header.parent,
1587 Log("schedule request failed\n");
1590 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1593 Log("%u is a read-only volume; not salvaged\n",
1594 singleVolumeNumber);
1599 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1600 || summary.header.parent == singleVolumeNumber) {
1602 /* check if the header file is incorrectly named */
1604 const char *base = strrchr(name, OS_DIRSEPC);
1611 snprintf(nameShouldBe, sizeof nameShouldBe,
1612 VFORMAT, afs_printable_uint32_lu(summary.header.id));
1615 if (strcmp(nameShouldBe, base)) {
1616 /* .vol file has wrong name; retry/delete */
1620 if (!badname || last) {
1621 /* only offline the volume if the header is good, or if this is
1622 * the last try looking at it; avoid AskOffline'ing the same vol
1625 if (singleVolumeNumber
1626 && summary.header.id != singleVolumeNumber) {
1627 /* don't offline singleVolumeNumber; we already did that
1630 AskOffline(salvinfo, summary.header.id);
1632 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
1634 /* don't lock the volume if the header is bad, since we're
1635 * about to delete it anyway. */
1636 if (LockVolume(salvinfo, summary.header.id)) {
1641 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
1645 if (last && !Showmode) {
1646 Log("Volume header file %s is incorrectly named (should be %s "
1647 "not %s); %sdeleted (it will be recreated later, if "
1648 "necessary)\n", name, nameShouldBe, base,
1649 (Testing ? "it would have been " : ""));
1657 if (params->nVolumes > params->totalVolumes) {
1658 /* We found more volumes than we found on the first partition walk;
1659 * apparently something created a volume while we were
1660 * partition-salvaging, or we found more than 20 vols when salvaging a
1661 * particular volume. Abort if we detect this, since other programs
1662 * supposed to not touch the partition while it is partition-salvaging,
1663 * and we shouldn't find more than 20 vols in a VG.
1665 Abort("Found %ld vol headers, but should have found at most %ld! "
1666 "Make sure the volserver/fileserver are not running at the "
1667 "same time as a partition salvage\n",
1668 afs_printable_int32_ld(params->nVolumes),
1669 afs_printable_int32_ld(params->totalVolumes));
1672 memcpy(params->vsp, &summary, sizeof(summary));
1680 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1682 * If the header could not be read in at all, the header is always unlinked.
1683 * If instead RecordHeader said the header was bad (that is, the header file
1684 * is mis-named), we only unlink if we are doing a partition salvage, as
1685 * opposed to salvaging a specific volume group.
1687 * @param[in] dp the disk partition
1688 * @param[in] name full path to the .vol header
1689 * @param[in] hdr header data, or NULL if the header could not be read
1690 * @param[in] rock actually a struct SalvageScanParams*, with some information
1694 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1695 struct VolumeDiskHeader *hdr, void *rock)
1697 struct SalvageScanParams *params;
1700 params = (struct SalvageScanParams *)rock;
1703 /* no header; header is too bogus to read in at all */
1705 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1711 } else if (!params->singleVolumeNumber) {
1712 /* We were able to read in a header, but RecordHeader said something
1713 * was wrong with it. We only unlink those if we are doing a partition
1720 if (dounlink && unlink(name)) {
1721 Log("Error %d while trying to unlink %s\n", errno, name);
1726 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1727 * the fileserver for VG information, or by scanning the /vicepX partition.
1729 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1730 * are salvaging, or 0 if this is a partition
1733 * @return operation status
1735 * @retval -1 we raced with a fileserver restart; checking out and locking
1736 * volumes must be retried
1739 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1741 afs_int32 nvols = 0;
1742 struct SalvageScanParams params;
1745 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1747 /* we successfully got the vol information from the fileserver; no
1748 * need to scan the partition */
1752 /* we need to retry volume checkout */
1756 if (!singleVolumeNumber) {
1757 /* Count how many volumes we have in /vicepX */
1758 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1761 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1766 nvols = VOL_VG_MAX_VOLS;
1769 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1770 osi_Assert(salvinfo->volumeSummaryp != NULL);
1772 params.singleVolumeNumber = singleVolumeNumber;
1773 params.vsp = salvinfo->volumeSummaryp;
1774 params.nVolumes = 0;
1775 params.totalVolumes = nvols;
1777 params.salvinfo = salvinfo;
1779 /* walk the partition directory of volume headers and record the info
1780 * about them; unlinking invalid headers */
1781 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1782 UnlinkHeader, ¶ms);
1784 /* we apparently need to retry checking-out/locking volumes */
1788 Abort("Failed to get volume header summary\n");
1790 salvinfo->nVolumes = params.nVolumes;
1792 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1798 /* Find the link table. This should be associated with the RW volume or, if
1799 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1802 FindLinkHandle(struct InodeSummary *isp, int nVols,
1803 struct ViceInodeInfo *allInodes)
1806 struct ViceInodeInfo *ip;
1808 for (i = 0; i < nVols; i++) {
1809 ip = allInodes + isp[i].index;
1810 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1811 if (ip[j].u.special.type == VI_LINKTABLE)
1812 return ip[j].inodeNumber;
1819 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1821 struct versionStamp version;
1824 if (!VALID_INO(ino))
1826 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1827 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1828 if (!VALID_INO(ino))
1830 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1831 isp->RWvolumeId, errno);
1832 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1833 fdP = IH_OPEN(salvinfo->VGLinkH);
1835 Abort("Can't open link table for volume %u (error = %d)\n",
1836 isp->RWvolumeId, errno);
1838 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1839 Abort("Can't truncate link table for volume %u (error = %d)\n",
1840 isp->RWvolumeId, errno);
1842 version.magic = LINKTABLEMAGIC;
1843 version.version = LINKTABLEVERSION;
1845 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1847 Abort("Can't truncate link table for volume %u (error = %d)\n",
1848 isp->RWvolumeId, errno);
1850 FDH_REALLYCLOSE(fdP);
1852 /* If the volume summary exits (i.e., the V*.vol header file exists),
1853 * then set this inode there as well.
1855 if (isp->volSummary)
1856 isp->volSummary->header.linkTable = ino;
1865 SVGParms_t *parms = (SVGParms_t *) arg;
1866 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1871 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1874 pthread_attr_t tattr;
1878 /* Initialize per volume global variables, even if later code does so */
1879 salvinfo->VolumeChanged = 0;
1880 salvinfo->VGLinkH = NULL;
1881 salvinfo->VGLinkH_cnt = 0;
1882 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1884 parms.svgp_inodeSummaryp = isp;
1885 parms.svgp_count = nVols;
1886 parms.svgp_salvinfo = salvinfo;
1887 code = pthread_attr_init(&tattr);
1889 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1893 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1895 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1898 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1900 Log("Failed to create thread to salvage volume group %u\n",
1904 (void)pthread_join(tid, NULL);
1906 #endif /* AFS_NT40_ENV */
1909 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1911 struct ViceInodeInfo *inodes, *allInodes, *ip;
1912 int i, totalInodes, size, salvageTo;
1916 int dec_VGLinkH = 0;
1918 FdHandle_t *fdP = NULL;
1920 salvinfo->VGLinkH_cnt = 0;
1921 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1922 && isp->nSpecialInodes > 0);
1923 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1924 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1927 if (ShowMounts && !haveRWvolume)
1929 if (canfork && !debug && Fork() != 0) {
1930 (void)Wait("Salvage volume group");
1933 for (i = 0, totalInodes = 0; i < nVols; i++)
1934 totalInodes += isp[i].nInodes;
1935 size = totalInodes * sizeof(struct ViceInodeInfo);
1936 inodes = (struct ViceInodeInfo *)malloc(size);
1937 allInodes = inodes - isp->index; /* this would the base of all the inodes
1938 * for the partition, if all the inodes
1939 * had been read into memory */
1941 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1943 osi_Assert(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1945 /* Don't try to salvage a read write volume if there isn't one on this
1947 salvageTo = haveRWvolume ? 0 : 1;
1949 #ifdef AFS_NAMEI_ENV
1950 ino = FindLinkHandle(isp, nVols, allInodes);
1951 if (VALID_INO(ino)) {
1952 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1953 fdP = IH_OPEN(salvinfo->VGLinkH);
1955 if (VALID_INO(ino) && fdP != NULL) {
1956 struct versionStamp header;
1957 afs_sfsize_t nBytes;
1959 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1960 if (nBytes != sizeof(struct versionStamp)
1961 || header.magic != LINKTABLEMAGIC) {
1962 Log("Bad linktable header for volume %u.\n", isp->RWvolumeId);
1963 FDH_REALLYCLOSE(fdP);
1967 if (!VALID_INO(ino) || fdP == NULL) {
1968 Log("%s link table for volume %u.\n",
1969 Testing ? "Would have recreated" : "Recreating", isp->RWvolumeId);
1971 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1974 struct ViceInodeInfo *ip;
1975 CreateLinkTable(salvinfo, isp, ino);
1976 fdP = IH_OPEN(salvinfo->VGLinkH);
1977 /* Sync fake 1 link counts to the link table, now that it exists */
1979 for (i = 0; i < nVols; i++) {
1980 ip = allInodes + isp[i].index;
1981 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1982 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1989 FDH_REALLYCLOSE(fdP);
1991 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1994 /* Salvage in reverse order--read/write volume last; this way any
1995 * Inodes not referenced by the time we salvage the read/write volume
1996 * can be picked up by the read/write volume */
1997 /* ACTUALLY, that's not done right now--the inodes just vanish */
1998 for (i = nVols - 1; i >= salvageTo; i--) {
2000 struct InodeSummary *lisp = &isp[i];
2001 #ifdef AFS_NAMEI_ENV
2002 /* If only the RO is present on this partition, the link table
2003 * shows up as a RW volume special file. Need to make sure the
2004 * salvager doesn't try to salvage the non-existent RW.
2006 if (rw && nVols > 1 && isp[i].nSpecialInodes == 1) {
2007 /* If this only special inode is the link table, continue */
2008 if (inodes->u.special.type == VI_LINKTABLE) {
2015 Log("%s VOLUME %u%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2016 lisp->volumeId, (Testing ? "(READONLY mode)" : ""));
2017 /* Check inodes twice. The second time do things seriously. This
2018 * way the whole RO volume can be deleted, below, if anything goes wrong */
2019 for (check = 1; check >= 0; check--) {
2021 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2023 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2024 if (rw && deleteMe) {
2025 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2026 * volume won't be called */
2032 if (rw && check == 1)
2034 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2035 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2041 /* Fix actual inode counts */
2044 Log("totalInodes %d\n",totalInodes);
2045 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2046 static int TraceBadLinkCounts = 0;
2047 #ifdef AFS_NAMEI_ENV
2048 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2049 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2050 VGLinkH_p1 = ip->u.param[0];
2051 continue; /* Deal with this last. */
2054 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2055 TraceBadLinkCounts--; /* Limit reports, per volume */
2056 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]);
2058 while (ip->linkCount > 0) {
2059 /* below used to assert, not break */
2061 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2062 Log("idec failed. inode %s errno %d\n",
2063 PrintInode(stmp, ip->inodeNumber), errno);
2069 while (ip->linkCount < 0) {
2070 /* these used to be asserts */
2072 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2073 Log("iinc failed. inode %s errno %d\n",
2074 PrintInode(stmp, ip->inodeNumber), errno);
2081 #ifdef AFS_NAMEI_ENV
2082 while (dec_VGLinkH > 0) {
2083 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2084 Log("idec failed on link table, errno = %d\n", errno);
2088 while (dec_VGLinkH < 0) {
2089 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2090 Log("iinc failed on link table, errno = %d\n", errno);
2097 /* Directory consistency checks on the rw volume */
2099 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2100 IH_RELEASE(salvinfo->VGLinkH);
2102 if (canfork && !debug) {
2109 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2111 /* Check headers BEFORE forking */
2115 for (i = 0; i < nVols; i++) {
2116 struct VolumeSummary *vs = isp[i].volSummary;
2117 VolumeDiskData volHeader;
2119 /* Don't salvage just because phantom rw volume is there... */
2120 /* (If a read-only volume exists, read/write inodes must also exist) */
2121 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2125 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2126 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2127 == sizeof(volHeader)
2128 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2129 && volHeader.dontSalvage == DONT_SALVAGE
2130 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2131 if (volHeader.inUse != 0) {
2132 volHeader.inUse = 0;
2133 volHeader.inService = 1;
2135 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2136 != sizeof(volHeader)) {
2152 /* SalvageVolumeHeaderFile
2154 * Salvage the top level V*.vol header file. Make sure the special files
2155 * exist and that there are no duplicates.
2157 * Calls SalvageHeader for each possible type of volume special file.
2161 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2162 struct ViceInodeInfo *inodes, int RW,
2163 int check, int *deleteMe)
2166 struct ViceInodeInfo *ip;
2167 int allinodesobsolete = 1;
2168 struct VolumeDiskHeader diskHeader;
2169 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2171 struct VolumeHeader tempHeader;
2172 struct afs_inode_info stuff[MAXINODETYPE];
2174 /* keeps track of special inodes that are probably 'good'; they are
2175 * referenced in the vol header, and are included in the given inodes
2180 } goodspecial[MAXINODETYPE];
2185 memset(goodspecial, 0, sizeof(goodspecial));
2187 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2189 Log("cannot allocate memory for inode skip array when salvaging "
2190 "volume %lu; not performing duplicate special inode recovery\n",
2191 afs_printable_uint32_lu(isp->volumeId));
2192 /* still try to perform the salvage; the skip array only does anything
2193 * if we detect duplicate special inodes */
2196 init_inode_info(&tempHeader, stuff);
2199 * First, look at the special inodes and see if any are referenced by
2200 * the existing volume header. If we find duplicate special inodes, we
2201 * can use this information to use the referenced inode (it's more
2202 * likely to be the 'good' one), and throw away the duplicates.
2204 if (isp->volSummary && skip) {
2205 /* use tempHeader, so we can use the stuff[] array to easily index
2206 * into the isp->volSummary special inodes */
2207 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2209 for (i = 0; i < isp->nSpecialInodes; i++) {
2210 ip = &inodes[isp->index + i];
2211 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2212 /* will get taken care of in a later loop */
2215 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2216 goodspecial[ip->u.special.type-1].valid = 1;
2217 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2222 memset(&tempHeader, 0, sizeof(tempHeader));
2223 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2224 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2225 tempHeader.id = isp->volumeId;
2226 tempHeader.parent = isp->RWvolumeId;
2228 /* Check for duplicates (inodes are sorted by type field) */
2229 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2230 ip = &inodes[isp->index + i];
2231 if (ip->u.special.type == (ip + 1)->u.special.type) {
2232 afs_ino_str_t stmp1, stmp2;
2234 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2235 /* Will be caught in the loop below */
2239 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2240 ip->u.special.type, isp->volumeId,
2241 PrintInode(stmp1, ip->inodeNumber),
2242 PrintInode(stmp2, (ip+1)->inodeNumber));
2244 if (skip && goodspecial[ip->u.special.type-1].valid) {
2245 Inode gi = goodspecial[ip->u.special.type-1].inode;
2248 Log("using special inode referenced by vol header (%s)\n",
2249 PrintInode(stmp1, gi));
2252 /* the volume header references some special inode of
2253 * this type in the inodes array; are we it? */
2254 if (ip->inodeNumber != gi) {
2256 } else if ((ip+1)->inodeNumber != gi) {
2257 /* in case this is the last iteration; we need to
2258 * make sure we check ip+1, too */
2263 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp->volumeId);
2271 for (i = 0; i < isp->nSpecialInodes; i++) {
2273 ip = &inodes[isp->index + i];
2274 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2276 Log("Rubbish header inode %s of type %d\n",
2277 PrintInode(stmp, ip->inodeNumber),
2278 ip->u.special.type);
2284 Log("Rubbish header inode %s of type %d; deleted\n",
2285 PrintInode(stmp, ip->inodeNumber),
2286 ip->u.special.type);
2287 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2288 if (skip && skip[i]) {
2289 if (orphans == ORPH_REMOVE) {
2290 Log("Removing orphan special inode %s of type %d\n",
2291 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2294 Log("Ignoring orphan special inode %s of type %d\n",
2295 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2296 /* fall through to the ip->linkCount--; line below */
2299 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2300 allinodesobsolete = 0;
2302 if (!check && ip->u.special.type != VI_LINKTABLE)
2303 ip->linkCount--; /* Keep the inode around */
2311 if (allinodesobsolete) {
2318 salvinfo->VGLinkH_cnt++; /* one for every header. */
2320 if (!RW && !check && isp->volSummary) {
2321 ClearROInUseBit(isp->volSummary);
2325 for (i = 0; i < MAXINODETYPE; i++) {
2326 if (stuff[i].inodeType == VI_LINKTABLE) {
2327 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2328 * And we may have recreated the link table earlier, so set the
2329 * RW header as well. The header magic was already checked.
2331 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2332 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2336 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2340 if (isp->volSummary == NULL) {
2342 char headerName[64];
2343 snprintf(headerName, sizeof headerName, VFORMAT,
2344 afs_printable_uint32_lu(isp->volumeId));
2345 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2346 salvinfo->fileSysPath, headerName);
2348 Log("No header file for volume %u\n", isp->volumeId);
2352 Log("No header file for volume %u; %screating %s\n",
2353 isp->volumeId, (Testing ? "it would have been " : ""),
2355 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2357 writefunc = VCreateVolumeDiskHeader;
2360 char headerName[64];
2361 /* hack: these two fields are obsolete... */
2362 isp->volSummary->header.volumeAcl = 0;
2363 isp->volSummary->header.volumeMountTable = 0;
2366 (&isp->volSummary->header, &tempHeader,
2367 sizeof(struct VolumeHeader))) {
2368 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2369 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2370 salvinfo->fileSysPath, headerName);
2372 Log("Header file %s is damaged or no longer valid%s\n", path,
2373 (check ? "" : "; repairing"));
2377 writefunc = VWriteVolumeDiskHeader;
2381 memcpy(&isp->volSummary->header, &tempHeader,
2382 sizeof(struct VolumeHeader));
2385 Log("It would have written a new header file for volume %u\n",
2389 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2390 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2392 Log("Error %ld writing volume header file for volume %lu\n",
2393 afs_printable_int32_ld(code),
2394 afs_printable_uint32_lu(diskHeader.id));
2399 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2400 isp->volSummary->header.volumeInfo);
2405 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2406 struct InodeSummary *isp, int check, int *deleteMe)
2409 VolumeDiskData volumeInfo;
2410 struct versionStamp fileHeader;
2419 #ifndef AFS_NAMEI_ENV
2420 if (sp->inodeType == VI_LINKTABLE)
2421 return 0; /* header magic was already checked */
2423 if (*(sp->inode) == 0) {
2425 Log("Missing inode in volume header (%s)\n", sp->description);
2429 Log("Missing inode in volume header (%s); %s\n", sp->description,
2430 (Testing ? "it would have recreated it" : "recreating"));
2433 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2434 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2435 if (!VALID_INO(*(sp->inode)))
2437 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2438 sp->description, errno);
2443 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2444 fdP = IH_OPEN(specH);
2445 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2446 /* bail out early and destroy the volume */
2448 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2455 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2456 sp->description, errno);
2459 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2460 || header.fileHeader.magic != sp->stamp.magic)) {
2462 Log("Part of the header (%s) is corrupted\n", sp->description);
2463 FDH_REALLYCLOSE(fdP);
2467 Log("Part of the header (%s) is corrupted; recreating\n",
2470 /* header can be garbage; make sure we don't read garbage data from
2472 memset(&header, 0, sizeof(header));
2474 #ifdef AFS_NAMEI_ENV
2475 if (namei_FixSpecialOGM(fdP, check)) {
2476 Log("Error with namei header OGM data (%s)\n", sp->description);
2477 FDH_REALLYCLOSE(fdP);
2482 if (sp->inodeType == VI_VOLINFO
2483 && header.volumeInfo.destroyMe == DESTROY_ME) {
2486 FDH_REALLYCLOSE(fdP);
2490 if (recreate && !Testing) {
2493 ("Internal error: recreating volume header (%s) in check mode\n",
2495 nBytes = FDH_TRUNC(fdP, 0);
2497 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2498 sp->description, errno);
2500 /* The following code should be moved into vutil.c */
2501 if (sp->inodeType == VI_VOLINFO) {
2503 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2504 header.volumeInfo.stamp = sp->stamp;
2505 header.volumeInfo.id = isp->volumeId;
2506 header.volumeInfo.parentId = isp->RWvolumeId;
2507 sprintf(header.volumeInfo.name, "bogus.%u", isp->volumeId);
2508 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2509 isp->volumeId, isp->volumeId);
2510 header.volumeInfo.inService = 0;
2511 header.volumeInfo.blessed = 0;
2512 /* The + 1000 is a hack in case there are any files out in venus caches */
2513 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2514 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2515 header.volumeInfo.needsCallback = 0;
2516 gettimeofday(&tp, NULL);
2517 header.volumeInfo.creationDate = tp.tv_sec;
2519 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2520 sizeof(header.volumeInfo), 0);
2521 if (nBytes != sizeof(header.volumeInfo)) {
2524 ("Unable to write volume header file (%s) (errno = %d)\n",
2525 sp->description, errno);
2526 Abort("Unable to write entire volume header file (%s)\n",
2530 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2531 if (nBytes != sizeof(sp->stamp)) {
2534 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2535 sp->description, errno);
2537 ("Unable to write entire version stamp in volume header file (%s)\n",
2542 FDH_REALLYCLOSE(fdP);
2544 if (sp->inodeType == VI_VOLINFO) {
2545 salvinfo->VolInfo = header.volumeInfo;
2549 if (salvinfo->VolInfo.updateDate) {
2550 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2552 Log("%s (%u) %supdated %s\n", salvinfo->VolInfo.name,
2553 salvinfo->VolInfo.id,
2554 (Testing ? "it would have been " : ""), update);
2556 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2558 Log("%s (%u) not updated (created %s)\n",
2559 salvinfo->VolInfo.name, salvinfo->VolInfo.id, update);
2569 SalvageVnodes(struct SalvInfo *salvinfo,
2570 struct InodeSummary *rwIsp,
2571 struct InodeSummary *thisIsp,
2572 struct ViceInodeInfo *inodes, int check)
2574 int ilarge, ismall, ioffset, RW, nInodes;
2575 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2578 RW = (rwIsp == thisIsp);
2579 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2581 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2582 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2583 if (check && ismall == -1)
2586 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2587 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2588 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2592 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2593 struct ViceInodeInfo *ip, int nInodes,
2594 struct VolumeSummary *volSummary, int check)
2596 char buf[SIZEOF_LARGEDISKVNODE];
2597 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2599 StreamHandle_t *file;
2600 struct VnodeClassInfo *vcp;
2602 afs_sfsize_t nVnodes;
2603 afs_fsize_t vnodeLength;
2605 afs_ino_str_t stmp1, stmp2;
2609 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2610 fdP = IH_OPEN(handle);
2611 osi_Assert(fdP != NULL);
2612 file = FDH_FDOPEN(fdP, "r+");
2613 osi_Assert(file != NULL);
2614 vcp = &VnodeClassInfo[class];
2615 size = OS_SIZE(fdP->fd_fd);
2616 osi_Assert(size != -1);
2617 nVnodes = (size / vcp->diskSize) - 1;
2619 osi_Assert((nVnodes + 1) * vcp->diskSize == size);
2620 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
2624 for (vnodeIndex = 0;
2625 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2626 nVnodes--, vnodeIndex++) {
2627 if (vnode->type != vNull) {
2628 int vnodeChanged = 0;
2629 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2630 if (VNDISK_GET_INO(vnode) == 0) {
2632 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2633 memset(vnode, 0, vcp->diskSize);
2637 if (vcp->magic != vnode->vnodeMagic) {
2638 /* bad magic #, probably partially created vnode */
2640 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2641 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2642 afs_printable_uint32_lu(vcp->magic));
2643 memset(vnode, 0, vcp->diskSize);
2647 Log("Partially allocated vnode %d deleted.\n",
2649 memset(vnode, 0, vcp->diskSize);
2653 /* ****** Should do a bit more salvage here: e.g. make sure
2654 * vnode type matches what it should be given the index */
2655 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2656 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2657 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2658 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2665 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2666 /* The following doesn't work, because the version number
2667 * is not maintained correctly by the file server */
2668 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2669 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2671 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2677 /* For RW volume, look for vnode with matching inode number;
2678 * if no such match, take the first determined by our sort
2680 struct ViceInodeInfo *lip = ip;
2681 int lnInodes = nInodes;
2683 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2684 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2693 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2694 /* "Matching" inode */
2698 vu = vnode->uniquifier;
2699 iu = ip->u.vnode.vnodeUniquifier;
2700 vd = vnode->dataVersion;
2701 id = ip->u.vnode.inodeDataVersion;
2703 * Because of the possibility of the uniquifier overflows (> 4M)
2704 * we compare them modulo the low 22-bits; we shouldn't worry
2705 * about mismatching since they shouldn't to many old
2706 * uniquifiers of the same vnode...
2708 if (IUnique(vu) != IUnique(iu)) {
2710 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2713 vnode->uniquifier = iu;
2714 #ifdef AFS_3DISPARES
2715 vnode->dataVersion = (id >= vd ?
2718 1887437 ? vd : id) :
2721 1887437 ? id : vd));
2723 #if defined(AFS_SGI_EXMAG)
2724 vnode->dataVersion = (id >= vd ?
2727 15099494 ? vd : id) :
2730 15099494 ? id : vd));
2732 vnode->dataVersion = (id > vd ? id : vd);
2733 #endif /* AFS_SGI_EXMAG */
2734 #endif /* AFS_3DISPARES */
2737 /* don't bother checking for vd > id any more, since
2738 * partial file transfers always result in this state,
2739 * and you can't do much else anyway (you've already
2740 * found the best data you can) */
2741 #ifdef AFS_3DISPARES
2742 if (!vnodeIsDirectory(vnodeNumber)
2743 && ((vd < id && (id - vd) < 1887437)
2744 || ((vd > id && (vd - id) > 1887437)))) {
2746 #if defined(AFS_SGI_EXMAG)
2747 if (!vnodeIsDirectory(vnodeNumber)
2748 && ((vd < id && (id - vd) < 15099494)
2749 || ((vd > id && (vd - id) > 15099494)))) {
2751 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2752 #endif /* AFS_SGI_EXMAG */
2755 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2756 vnode->dataVersion = id;
2761 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2764 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2766 VNDISK_SET_INO(vnode, ip->inodeNumber);
2771 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2773 VNDISK_SET_INO(vnode, ip->inodeNumber);
2776 VNDISK_GET_LEN(vnodeLength, vnode);
2777 if (ip->byteCount != vnodeLength) {
2780 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2785 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2786 VNDISK_SET_LEN(vnode, ip->byteCount);
2790 ip->linkCount--; /* Keep the inode around */
2793 } else { /* no matching inode */
2795 if (VNDISK_GET_INO(vnode) != 0
2796 || vnode->type == vDirectory) {
2797 /* No matching inode--get rid of the vnode */
2799 if (VNDISK_GET_INO(vnode)) {
2801 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2805 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2810 if (VNDISK_GET_INO(vnode)) {
2812 time_t serverModifyTime = vnode->serverModifyTime;
2813 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2817 time_t serverModifyTime = vnode->serverModifyTime;
2818 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2821 memset(vnode, 0, vcp->diskSize);
2824 /* Should not reach here becuase we checked for
2825 * (inodeNumber == 0) above. And where we zero the vnode,
2826 * we also goto vnodeDone.
2830 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2834 } /* VNDISK_GET_INO(vnode) != 0 */
2836 osi_Assert(!(vnodeChanged && check));
2837 if (vnodeChanged && !Testing) {
2838 osi_Assert(IH_IWRITE
2839 (handle, vnodeIndexOffset(vcp, vnodeNumber),
2840 (char *)vnode, vcp->diskSize)
2842 salvinfo->VolumeChanged = 1; /* For break call back */
2853 struct VnodeEssence *
2854 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2857 struct VnodeInfo *vip;
2860 class = vnodeIdToClass(vnodeNumber);
2861 vip = &salvinfo->vnodeInfo[class];
2862 offset = vnodeIdToBitNumber(vnodeNumber);
2863 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2867 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2869 /* Copy the directory unconditionally if we are going to change it:
2870 * not just if was cloned.
2872 struct VnodeDiskObject vnode;
2873 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2874 Inode oldinode, newinode;
2877 if (dir->copied || Testing)
2879 DFlush(); /* Well justified paranoia... */
2882 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2883 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2885 osi_Assert(code == sizeof(vnode));
2886 oldinode = VNDISK_GET_INO(&vnode);
2887 /* Increment the version number by a whole lot to avoid problems with
2888 * clients that were promised new version numbers--but the file server
2889 * crashed before the versions were written to disk.
2892 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2893 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2895 osi_Assert(VALID_INO(newinode));
2896 osi_Assert(CopyInode(salvinfo->fileSysDevice, oldinode, newinode, dir->rwVid) == 0);
2898 VNDISK_SET_INO(&vnode, newinode);
2900 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2901 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2903 osi_Assert(code == sizeof(vnode));
2905 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2906 salvinfo->fileSysDevice, newinode,
2907 &salvinfo->VolumeChanged);
2908 /* Don't delete the original inode right away, because the directory is
2909 * still being scanned.
2915 * This function should either successfully create a new dir, or give up
2916 * and leave things the way they were. In particular, if it fails to write
2917 * the new dir properly, it should return w/o changing the reference to the
2921 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2923 struct VnodeDiskObject vnode;
2924 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2925 Inode oldinode, newinode;
2930 afs_int32 parentUnique = 1;
2931 struct VnodeEssence *vnodeEssence;
2936 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2938 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2939 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2941 osi_Assert(lcode == sizeof(vnode));
2942 oldinode = VNDISK_GET_INO(&vnode);
2943 /* Increment the version number by a whole lot to avoid problems with
2944 * clients that were promised new version numbers--but the file server
2945 * crashed before the versions were written to disk.
2948 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2949 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2951 osi_Assert(VALID_INO(newinode));
2952 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2953 &salvinfo->VolumeChanged);
2955 /* Assign . and .. vnode numbers from dir and vnode.parent.
2956 * The uniquifier for . is in the vnode.
2957 * The uniquifier for .. might be set to a bogus value of 1 and
2958 * the salvager will later clean it up.
2960 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2961 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
2964 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
2966 (vnode.parent ? vnode.parent : dir->vnodeNumber),
2971 /* didn't really build the new directory properly, let's just give up. */
2972 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2973 Log("Directory salvage returned code %d, continuing.\n", code);
2975 Log("also failed to decrement link count on new inode");
2979 Log("Checking the results of the directory salvage...\n");
2980 if (!DirOK(&newdir)) {
2981 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
2982 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
2983 osi_Assert(code == 0);
2987 VNDISK_SET_INO(&vnode, newinode);
2988 length = afs_dir_Length(&newdir);
2989 VNDISK_SET_LEN(&vnode, length);
2991 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2992 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2994 osi_Assert(lcode == sizeof(vnode));
2997 nt_sync(salvinfo->fileSysDevice);
2999 sync(); /* this is slow, but hopefully rarely called. We don't have
3000 * an open FD on the file itself to fsync.
3004 salvinfo->vnodeInfo[vLarge].handle->ih_synced = 1;
3006 /* make sure old directory file is really closed */
3007 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3008 FDH_REALLYCLOSE(fdP);
3010 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3011 osi_Assert(code == 0);
3012 dir->dirHandle = newdir;
3016 * arguments for JudgeEntry.
3018 struct judgeEntry_params {
3019 struct DirSummary *dir; /**< directory we're examining entries in */
3020 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3024 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3027 struct judgeEntry_params *params = arock;
3028 struct DirSummary *dir = params->dir;
3029 struct SalvInfo *salvinfo = params->salvinfo;
3030 struct VnodeEssence *vnodeEssence;
3031 afs_int32 dirOrphaned, todelete;
3033 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3035 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3036 if (vnodeEssence == NULL) {
3038 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3041 CopyOnWrite(salvinfo, dir);
3042 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3047 #ifndef AFS_NAMEI_ENV
3048 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3049 * mount inode for the partition. If this inode were deleted, it would crash
3052 if (vnodeEssence->InodeNumber == 0) {
3053 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3055 CopyOnWrite(salvinfo, dir);
3056 osi_Assert(Delete(&dir->dirHandle, name) == 0);
3063 if (!(vnodeNumber & 1) && !Showmode
3064 && !(vnodeEssence->count || vnodeEssence->unique
3065 || vnodeEssence->modeBits)) {
3066 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3067 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3068 vnodeNumber, unique,
3069 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3073 CopyOnWrite(salvinfo, dir);
3074 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3080 /* Check if the Uniquifiers match. If not, change the directory entry
3081 * so its unique matches the vnode unique. Delete if the unique is zero
3082 * or if the directory is orphaned.
3084 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3085 if (!vnodeEssence->unique
3086 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3087 /* This is an orphaned directory. Don't delete the . or ..
3088 * entry. Otherwise, it will get created in the next
3089 * salvage and deleted again here. So Just skip it.
3094 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3097 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3101 fid.Vnode = vnodeNumber;
3102 fid.Unique = vnodeEssence->unique;
3103 CopyOnWrite(salvinfo, dir);
3104 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3106 osi_Assert(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3109 return 0; /* no need to continue */
3112 if (strcmp(name, ".") == 0) {
3113 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3115 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3118 CopyOnWrite(salvinfo, dir);
3119 osi_Assert(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3120 fid.Vnode = dir->vnodeNumber;
3121 fid.Unique = dir->unique;
3122 osi_Assert(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3123 vnodeNumber = fid.Vnode; /* Get the new Essence */
3124 unique = fid.Unique;
3125 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3129 } else if (strcmp(name, "..") == 0) {
3132 struct VnodeEssence *dotdot;
3133 pa.Vnode = dir->parent;
3134 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3135 osi_Assert(dotdot != NULL); /* XXX Should not be assert */
3136 pa.Unique = dotdot->unique;
3138 pa.Vnode = dir->vnodeNumber;
3139 pa.Unique = dir->unique;
3141 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3143 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3145 CopyOnWrite(salvinfo, dir);
3146 osi_Assert(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3147 osi_Assert(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3150 vnodeNumber = pa.Vnode; /* Get the new Essence */
3152 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3154 dir->haveDotDot = 1;
3155 } else if (strncmp(name, ".__afs", 6) == 0) {
3157 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3160 CopyOnWrite(salvinfo, dir);
3161 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3163 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3164 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3167 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3168 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3169 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3170 && !(vnodeEssence->modeBits & 0111)) {
3171 afs_sfsize_t nBytes;
3177 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3178 vnodeEssence->InodeNumber);
3181 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3185 size = FDH_SIZE(fdP);
3187 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3188 FDH_REALLYCLOSE(fdP);
3195 nBytes = FDH_PREAD(fdP, buf, size, 0);
3196 if (nBytes == size) {
3198 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3199 Log("Volume %u (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3200 dir->dirHandle.dirh_handle->ih_vid, dir->vname, dir->name ? dir->name : "??", name, buf,
3201 Testing ? "would convert" : "converted");
3202 vnodeEssence->modeBits |= 0111;
3203 vnodeEssence->changed = 1;
3204 } else if (ShowMounts) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3205 dir->dirHandle.dirh_handle->ih_vid, dir->vname,
3206 dir->name ? dir->name : "??", name, buf);
3208 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3209 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3211 FDH_REALLYCLOSE(fdP);
3214 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3215 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3216 if (vnodeIdToClass(vnodeNumber) == vLarge
3217 && vnodeEssence->name == NULL) {
3218 vnodeEssence->name = strdup(name);
3221 /* The directory entry points to the vnode. Check to see if the
3222 * vnode points back to the directory. If not, then let the
3223 * directory claim it (else it might end up orphaned). Vnodes
3224 * already claimed by another directory are deleted from this
3225 * directory: hardlinks to the same vnode are not allowed
3226 * from different directories.
3228 if (vnodeEssence->parent != dir->vnodeNumber) {
3229 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3230 /* Vnode does not point back to this directory.
3231 * Orphaned dirs cannot claim a file (it may belong to
3232 * another non-orphaned dir).
3235 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3237 vnodeEssence->parent = dir->vnodeNumber;
3238 vnodeEssence->changed = 1;
3240 /* Vnode was claimed by another directory */
3243 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3244 } else if (vnodeNumber == 1) {
3245 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3247 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3251 CopyOnWrite(salvinfo, dir);
3252 osi_Assert(afs_dir_Delete(&dir->dirHandle, name) == 0);
3257 /* This directory claims the vnode */
3258 vnodeEssence->claimed = 1;
3260 vnodeEssence->count--;
3265 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3266 VnodeClass class, Inode ino, Unique * maxu)
3268 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3269 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3270 char buf[SIZEOF_LARGEDISKVNODE];
3271 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3273 StreamHandle_t *file;
3278 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3279 fdP = IH_OPEN(vip->handle);
3280 osi_Assert(fdP != NULL);
3281 file = FDH_FDOPEN(fdP, "r+");
3282 osi_Assert(file != NULL);
3283 size = OS_SIZE(fdP->fd_fd);
3284 osi_Assert(size != -1);
3285 vip->nVnodes = (size / vcp->diskSize) - 1;
3286 if (vip->nVnodes > 0) {
3287 osi_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3288 osi_Assert(STREAM_ASEEK(file, vcp->diskSize) == 0);
3289 osi_Assert((vip->vnodes = (struct VnodeEssence *)
3290 calloc(vip->nVnodes, sizeof(struct VnodeEssence))) != NULL);
3291 if (class == vLarge) {
3292 osi_Assert((vip->inodes = (Inode *)
3293 calloc(vip->nVnodes, sizeof(Inode))) != NULL);
3302 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3303 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3304 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3305 nVnodes--, vnodeIndex++) {
3306 if (vnode->type != vNull) {
3307 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3308 afs_fsize_t vnodeLength;
3309 vip->nAllocatedVnodes++;
3310 vep->count = vnode->linkCount;
3311 VNDISK_GET_LEN(vnodeLength, vnode);
3312 vep->blockCount = nBlocks(vnodeLength);
3313 vip->volumeBlockCount += vep->blockCount;
3314 vep->parent = vnode->parent;
3315 vep->unique = vnode->uniquifier;
3316 if (*maxu < vnode->uniquifier)
3317 *maxu = vnode->uniquifier;
3318 vep->modeBits = vnode->modeBits;
3319 vep->InodeNumber = VNDISK_GET_INO(vnode);
3320 vep->type = vnode->type;
3321 vep->author = vnode->author;
3322 vep->owner = vnode->owner;
3323 vep->group = vnode->group;
3324 if (vnode->type == vDirectory) {
3325 if (class != vLarge) {
3326 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3327 vip->nAllocatedVnodes--;
3328 memset(vnode, 0, sizeof(*vnode));
3329 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3330 vnodeIndexOffset(vcp, vnodeNumber),
3331 (char *)&vnode, sizeof(vnode));
3332 salvinfo->VolumeChanged = 1;
3334 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3343 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3346 struct VnodeEssence *parentvp;
3352 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3353 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3354 strcat(path, OS_DIRSEP);
3355 strcat(path, vp->name);
3361 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3362 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3365 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3367 struct VnodeEssence *vep;
3370 return (1); /* Vnode zero does not exist */
3372 return (0); /* The root dir vnode is always claimed */
3373 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3374 if (!vep || !vep->claimed)
3375 return (1); /* Vnode is not claimed - it is orphaned */
3377 return (IsVnodeOrphaned(salvinfo, vep->parent));
3381 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3382 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3383 struct DirSummary *rootdir, int *rootdirfound)
3385 static struct DirSummary dir;
3386 static struct DirHandle dirHandle;
3387 struct VnodeEssence *parent;
3388 static char path[MAXPATHLEN];
3391 if (dirVnodeInfo->vnodes[i].salvaged)
3392 return; /* already salvaged */
3395 dirVnodeInfo->vnodes[i].salvaged = 1;
3397 if (dirVnodeInfo->inodes[i] == 0)
3398 return; /* Not allocated to a directory */
3400 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3401 if (dirVnodeInfo->vnodes[i].parent) {
3402 Log("Bad parent, vnode 1; %s...\n",
3403 (Testing ? "skipping" : "salvaging"));
3404 dirVnodeInfo->vnodes[i].parent = 0;
3405 dirVnodeInfo->vnodes[i].changed = 1;
3408 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3409 if (parent && parent->salvaged == 0)
3410 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3411 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3412 rootdir, rootdirfound);
3415 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3416 dir.unique = dirVnodeInfo->vnodes[i].unique;
3419 dir.parent = dirVnodeInfo->vnodes[i].parent;
3420 dir.haveDot = dir.haveDotDot = 0;
3421 dir.ds_linkH = alinkH;
3422 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3423 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3425 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3428 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3429 (Testing ? "skipping" : "salvaging"));
3432 CopyAndSalvage(salvinfo, &dir);
3434 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3437 dirHandle = dir.dirHandle;
3440 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3441 &dirVnodeInfo->vnodes[i], path);
3444 /* If enumeration failed for random reasons, we will probably delete
3445 * too much stuff, so we guard against this instead.
3447 struct judgeEntry_params judge_params;
3448 judge_params.salvinfo = salvinfo;
3449 judge_params.dir = &dir;
3451 osi_Assert(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3452 &judge_params) == 0);
3455 /* Delete the old directory if it was copied in order to salvage.
3456 * CopyOnWrite has written the new inode # to the disk, but we still
3457 * have the old one in our local structure here. Thus, we idec the
3461 if (dir.copied && !Testing) {
3462 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3463 osi_Assert(code == 0);
3464 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3467 /* Remember rootdir DirSummary _after_ it has been judged */
3468 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3469 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3477 * Get a new FID that can be used to create a new file.
3479 * @param[in] volHeader vol header for the volume
3480 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3481 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3482 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3483 * updated to the new max unique if we create a new
3487 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3488 VnodeClass class, AFSFid *afid, Unique *maxunique)
3491 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3492 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3496 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3497 /* no free vnodes; make a new one */
3498 salvinfo->vnodeInfo[class].nVnodes++;
3499 salvinfo->vnodeInfo[class].vnodes =
3500 realloc(salvinfo->vnodeInfo[class].vnodes,
3501 sizeof(struct VnodeEssence) * (i+1));
3503 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3506 afid->Vnode = bitNumberToVnodeNumber(i, class);
3508 if (volHeader->uniquifier < (*maxunique + 1)) {
3509 /* header uniq is bad; it will get bumped by 2000 later */
3510 afid->Unique = *maxunique + 1 + 2000;
3513 /* header uniq seems okay; just use that */
3514 afid->Unique = *maxunique = volHeader->uniquifier++;
3519 * Create a vnode for a README file explaining not to use a recreated-root vol.
3521 * @param[in] volHeader vol header for the volume
3522 * @param[in] alinkH ihandle for i/o for the volume
3523 * @param[in] vid volume id
3524 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3525 * updated to the new max unique if we create a new
3527 * @param[out] afid FID for the new readme vnode
3528 * @param[out] ainode the inode for the new readme file
3530 * @return operation status
3535 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3536 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3540 struct VnodeDiskObject *rvnode = NULL;
3542 IHandle_t *readmeH = NULL;
3543 struct VnodeEssence *vep;
3545 time_t now = time(NULL);
3547 /* Try to make the note brief, but informative. Only administrators should
3548 * be able to read this file at first, so we can hopefully assume they
3549 * know what AFS is, what a volume is, etc. */
3551 "This volume has been salvaged, but has lost its original root directory.\n"
3552 "The root directory that exists now has been recreated from orphan files\n"
3553 "from the rest of the volume. This recreated root directory may interfere\n"
3554 "with old cached data on clients, and there is no way the salvager can\n"
3555 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3556 "use this volume, but only copy the salvaged data to a new volume.\n"
3557 "Continuing to use this volume as it exists now may cause some clients to\n"
3558 "behave oddly when accessing this volume.\n"
3559 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3560 /* ^ the person reading this probably just lost some data, so they could
3561 * use some cheering up. */
3563 /* -1 for the trailing NUL */
3564 length = sizeof(readme) - 1;
3566 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3568 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3570 /* create the inode and write the contents */
3571 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3572 salvinfo->fileSysPath, 0, vid,
3573 afid->Vnode, afid->Unique, 1);
3574 if (!VALID_INO(readmeinode)) {
3575 Log("CreateReadme: readme IH_CREATE failed\n");
3579 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3580 bytes = IH_IWRITE(readmeH, 0, readme, length);
3581 IH_RELEASE(readmeH);
3583 if (bytes != length) {
3584 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3585 (int)sizeof(readme));
3589 /* create the vnode and write it out */
3590 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3592 Log("CreateRootDir: error alloc'ing memory\n");
3596 rvnode->type = vFile;
3598 rvnode->modeBits = 0777;
3599 rvnode->linkCount = 1;
3600 VNDISK_SET_LEN(rvnode, length);
3601 rvnode->uniquifier = afid->Unique;
3602 rvnode->dataVersion = 1;
3603 VNDISK_SET_INO(rvnode, readmeinode);
3604 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3609 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3611 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3612 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3613 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3615 if (bytes != SIZEOF_SMALLDISKVNODE) {
3616 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3617 (int)SIZEOF_SMALLDISKVNODE);
3621 /* update VnodeEssence for new readme vnode */
3622 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3624 vep->blockCount = nBlocks(length);
3625 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3626 vep->parent = rvnode->parent;
3627 vep->unique = rvnode->uniquifier;
3628 vep->modeBits = rvnode->modeBits;
3629 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3630 vep->type = rvnode->type;
3631 vep->author = rvnode->author;
3632 vep->owner = rvnode->owner;
3633 vep->group = rvnode->group;
3643 *ainode = readmeinode;
3648 if (IH_DEC(alinkH, readmeinode, vid)) {
3649 Log("CreateReadme (recovery): IH_DEC failed\n");
3661 * create a root dir for a volume that lacks one.
3663 * @param[in] volHeader vol header for the volume
3664 * @param[in] alinkH ihandle for disk access for this volume group
3665 * @param[in] vid volume id we're dealing with
3666 * @param[out] rootdir populated with info about the new root dir
3667 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3668 * updated to the new max unique if we create a new
3671 * @return operation status
3676 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3677 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3681 int decroot = 0, decreadme = 0;
3682 AFSFid did, readmeid;
3685 struct VnodeDiskObject *rootvnode = NULL;
3686 struct acl_accessList *ACL;
3689 struct VnodeEssence *vep;
3690 Inode readmeinode = 0;
3691 time_t now = time(NULL);
3693 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3694 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3698 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3699 /* We don't have any large vnodes in the volume; allocate room
3700 * for one so we can recreate the root dir */
3701 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3702 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3703 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3705 osi_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3706 osi_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3709 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3710 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3711 if (vep->type != vNull) {
3712 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3716 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3717 &readmeinode) != 0) {
3722 /* set the DV to a very high number, so it is unlikely that we collide
3723 * with a cached DV */
3726 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3728 if (!VALID_INO(rootinode)) {
3729 Log("CreateRootDir: IH_CREATE failed\n");
3734 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3735 rootinode, &salvinfo->VolumeChanged);
3739 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3740 Log("CreateRootDir: MakeDir failed\n");
3743 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3744 Log("CreateRootDir: Create failed\n");
3748 length = afs_dir_Length(&rootdir->dirHandle);
3749 DZap(&rootdir->dirHandle);
3751 /* create the new root dir vnode */
3752 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3754 Log("CreateRootDir: malloc failed\n");
3758 /* only give 'rl' permissions to 'system:administrators'. We do this to
3759 * try to catch the attention of an administrator, that they should not
3760 * be writing to this directory or continue to use it. */
3761 ACL = VVnodeDiskACL(rootvnode);
3762 ACL->size = sizeof(struct acl_accessList);
3763 ACL->version = ACL_ACLVERSION;
3767 ACL->entries[0].id = -204; /* system:administrators */
3768 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3770 rootvnode->type = vDirectory;
3771 rootvnode->cloned = 0;
3772 rootvnode->modeBits = 0777;
3773 rootvnode->linkCount = 2;
3774 VNDISK_SET_LEN(rootvnode, length);
3775 rootvnode->uniquifier = 1;
3776 rootvnode->dataVersion = dv;
3777 VNDISK_SET_INO(rootvnode, rootinode);
3778 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3779 rootvnode->author = 0;
3780 rootvnode->owner = 0;
3781 rootvnode->parent = 0;
3782 rootvnode->group = 0;
3783 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3785 /* write it out to disk */
3786 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3787 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3788 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3790 if (bytes != SIZEOF_LARGEDISKVNODE) {
3791 /* just cast to int and don't worry about printing real 64-bit ints;
3792 * a large disk vnode isn't anywhere near the 32-bit limit */
3793 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3794 (int)SIZEOF_LARGEDISKVNODE);
3798 /* update VnodeEssence for the new root vnode */
3799 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3801 vep->blockCount = nBlocks(length);
3802 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3803 vep->parent = rootvnode->parent;
3804 vep->unique = rootvnode->uniquifier;
3805 vep->modeBits = rootvnode->modeBits;
3806 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3807 vep->type = rootvnode->type;
3808 vep->author = rootvnode->author;
3809 vep->owner = rootvnode->owner;
3810 vep->group = rootvnode->group;
3820 /* update DirSummary for the new root vnode */
3821 rootdir->vnodeNumber = 1;
3822 rootdir->unique = 1;
3823 rootdir->haveDot = 1;
3824 rootdir->haveDotDot = 1;
3825 rootdir->rwVid = vid;
3826 rootdir->copied = 0;
3827 rootdir->parent = 0;
3828 rootdir->name = strdup(".");
3829 rootdir->vname = volHeader->name;
3830 rootdir->ds_linkH = alinkH;
3837 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3838 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3840 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3841 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3851 * salvage a volume group.
3853 * @param[in] salvinfo information for the curent salvage job
3854 * @param[in] rwIsp inode summary for rw volume
3855 * @param[in] alinkH link table inode handle
3857 * @return operation status
3861 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3863 /* This routine, for now, will only be called for read-write volumes */
3865 int BlocksInVolume = 0, FilesInVolume = 0;
3867 struct DirSummary rootdir, oldrootdir;
3868 struct VnodeInfo *dirVnodeInfo;
3869 struct VnodeDiskObject vnode;
3870 VolumeDiskData volHeader;
3872 int orphaned, rootdirfound = 0;
3873 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3874 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3875 struct VnodeEssence *vep;
3878 afs_sfsize_t nBytes;
3880 VnodeId LFVnode, ThisVnode;
3881 Unique LFUnique, ThisUnique;
3885 vid = rwIsp->volSummary->header.id;
3886 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3887 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3888 osi_Assert(nBytes == sizeof(volHeader));
3889 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3890 osi_Assert(volHeader.destroyMe != DESTROY_ME);
3891 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3893 DistilVnodeEssence(salvinfo, vid, vLarge,
3894 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3895 DistilVnodeEssence(salvinfo, vid, vSmall,
3896 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3898 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3899 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3900 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3901 &rootdir, &rootdirfound);
3904 nt_sync(salvinfo->fileSysDevice);
3906 sync(); /* This used to be done lower level, for every dir */
3913 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3915 Log("Cannot find root directory for volume %lu; attempting to create "
3916 "a new one\n", afs_printable_uint32_lu(vid));
3918 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3923 salvinfo->VolumeChanged = 1;
3927 /* Parse each vnode looking for orphaned vnodes and
3928 * connect them to the tree as orphaned (if requested).
3930 oldrootdir = rootdir;
3931 for (class = 0; class < nVNODECLASSES; class++) {
3932 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3933 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3934 ThisVnode = bitNumberToVnodeNumber(v, class);
3935 ThisUnique = vep->unique;
3937 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3938 continue; /* Ignore unused, claimed, and root vnodes */
3940 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3941 * entry in this vnode had incremented the parent link count (In
3942 * JudgeEntry()). We need to go to the parent and decrement that
3943 * link count. But if the parent's unique is zero, then the parent
3944 * link count was not incremented in JudgeEntry().
3946 if (class == vLarge) { /* directory vnode */
3947 pv = vnodeIdToBitNumber(vep->parent);
3948 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3949 if (vep->parent == 1 && newrootdir) {
3950 /* this vnode's parent was the volume root, and
3951 * we just created the volume root. So, the parent
3952 * dir didn't exist during JudgeEntry, so the link
3953 * count was not inc'd there, so don't dec it here.
3959 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
3965 continue; /* If no rootdir, can't attach orphaned files */
3967 /* Here we attach orphaned files and directories into the
3968 * root directory, LVVnode, making sure link counts stay correct.
3970 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
3971 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
3972 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
3974 /* Update this orphaned vnode's info. Its parent info and
3975 * link count (do for orphaned directories and files).
3977 vep->parent = LFVnode; /* Parent is the root dir */
3978 vep->unique = LFUnique;
3981 vep->count--; /* Inc link count (root dir will pt to it) */
3983 /* If this orphaned vnode is a directory, change '..'.
3984 * The name of the orphaned dir/file is unknown, so we
3985 * build a unique name. No need to CopyOnWrite the directory
3986 * since it is not connected to tree in BK or RO volume and
3987 * won't be visible there.
3989 if (class == vLarge) {
3993 /* Remove and recreate the ".." entry in this orphaned directory */
3994 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
3995 salvinfo->vnodeInfo[class].inodes[v],
3996 &salvinfo->VolumeChanged);
3998 pa.Unique = LFUnique;
3999 osi_Assert(afs_dir_Delete(&dh, "..") == 0);
4000 osi_Assert(afs_dir_Create(&dh, "..", &pa) == 0);
4002 /* The original parent's link count was decremented above.
4003 * Here we increment the new parent's link count.
4005 pv = vnodeIdToBitNumber(LFVnode);
4006 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4010 /* Go to the root dir and add this entry. The link count of the
4011 * root dir was incremented when ".." was created. Try 10 times.
4013 for (j = 0; j < 10; j++) {
4014 pa.Vnode = ThisVnode;
4015 pa.Unique = ThisUnique;
4017 snprintf(npath, sizeof npath, "%s.%u.%u",
4018 ((class == vLarge) ? "__ORPHANDIR__"
4019 : "__ORPHANFILE__"),
4020 ThisVnode, ThisUnique);
4022 CopyOnWrite(salvinfo, &rootdir);
4023 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4027 ThisUnique += 50; /* Try creating a different file */
4029 osi_Assert(code == 0);
4030 Log("Attaching orphaned %s to volume's root dir as %s\n",
4031 ((class == vLarge) ? "directory" : "file"), npath);
4033 } /* for each vnode in the class */
4034 } /* for each class of vnode */
4036 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4038 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4040 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4042 osi_Assert(code == 0);
4043 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4046 DFlush(); /* Flush the changes */
4047 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4048 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4049 orphans = ORPH_IGNORE;
4052 /* Write out all changed vnodes. Orphaned files and directories
4053 * will get removed here also (if requested).
4055 for (class = 0; class < nVNODECLASSES; class++) {
4056 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4057 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4058 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4059 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4060 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4061 for (i = 0; i < nVnodes; i++) {
4062 struct VnodeEssence *vnp = &vnodes[i];
4063 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4065 /* If the vnode is good but is unclaimed (not listed in
4066 * any directory entries), then it is orphaned.
4069 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4070 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4074 if (vnp->changed || vnp->count) {
4077 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4078 vnodeIndexOffset(vcp, vnodeNumber),
4079 (char *)&vnode, sizeof(vnode));
4080 osi_Assert(nBytes == sizeof(vnode));
4082 vnode.parent = vnp->parent;
4083 oldCount = vnode.linkCount;
4084 vnode.linkCount = vnode.linkCount - vnp->count;
4087 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4089 if (!vnp->todelete) {
4090 /* Orphans should have already been attached (if requested) */
4091 osi_Assert(orphans != ORPH_ATTACH);
4092 oblocks += vnp->blockCount;
4095 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4097 BlocksInVolume -= vnp->blockCount;
4099 if (VNDISK_GET_INO(&vnode)) {
4101 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4102 osi_Assert(code == 0);
4104 memset(&vnode, 0, sizeof(vnode));
4106 } else if (vnp->count) {
4108 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4111 vnode.modeBits = vnp->modeBits;
4114 vnode.dataVersion++;
4117 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4118 vnodeIndexOffset(vcp, vnodeNumber),
4119 (char *)&vnode, sizeof(vnode));
4120 osi_Assert(nBytes == sizeof(vnode));
4122 salvinfo->VolumeChanged = 1;
4126 if (!Showmode && ofiles) {
4127 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4129 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4133 for (class = 0; class < nVNODECLASSES; class++) {
4134 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4135 for (i = 0; i < vip->nVnodes; i++)
4136 if (vip->vnodes[i].name)
4137 free(vip->vnodes[i].name);
4144 /* Set correct resource utilization statistics */
4145 volHeader.filecount = FilesInVolume;
4146 volHeader.diskused = BlocksInVolume;
4148 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4149 if (volHeader.uniquifier < (maxunique + 1)) {
4151 Log("Volume uniquifier is too low; fixed\n");
4152 /* Plus 2,000 in case there are workstations out there with
4153 * cached vnodes that have since been deleted
4155 volHeader.uniquifier = (maxunique + 1 + 2000);
4159 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4160 "Only use this salvaged volume to copy data to another volume; "
4161 "do not continue to use this volume (%lu) as-is.\n",
4162 afs_printable_uint32_lu(vid));
4165 if (!Testing && salvinfo->VolumeChanged) {
4166 #ifdef FSSYNC_BUILD_CLIENT
4167 if (salvinfo->useFSYNC) {
4168 afs_int32 fsync_code;
4170 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4172 Log("Error trying to tell the fileserver to break callbacks for "
4173 "changed volume %lu; error code %ld\n",
4174 afs_printable_uint32_lu(vid),
4175 afs_printable_int32_ld(fsync_code));
4177 salvinfo->VolumeChanged = 0;
4180 #endif /* FSSYNC_BUILD_CLIENT */
4182 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4183 if (!salvinfo->useFSYNC) {
4184 /* A volume's contents have changed, but the fileserver will not
4185 * break callbacks on the volume until it tries to load the vol
4186 * header. So, to reduce the amount of time a client could have
4187 * stale data, remove fsstate.dat, so the fileserver will init
4188 * callback state with all clients. This is a very coarse hammer,
4189 * and in the future we should just record which volumes have
4191 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4192 if (code && errno != ENOENT) {
4193 Log("Error %d when trying to unlink FS state file %s\n", errno,
4194 AFSDIR_SERVER_FSSTATE_FILEPATH);
4200 /* Turn off the inUse bit; the volume's been salvaged! */
4201 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4202 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4203 volHeader.inService = 1; /* allow service again */
4204 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4205 volHeader.dontSalvage = DONT_SALVAGE;
4206 salvinfo->VolumeChanged = 0;
4208 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4209 osi_Assert(nBytes == sizeof(volHeader));
4212 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4213 (Testing ? "It would have " : ""), volHeader.name, volHeader.id,
4214 FilesInVolume, BlocksInVolume);
4217 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4218 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4224 ClearROInUseBit(struct VolumeSummary *summary)
4226 IHandle_t *h = summary->volumeInfoHandle;
4227 afs_sfsize_t nBytes;
4229 VolumeDiskData volHeader;
4231 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4232 osi_Assert(nBytes == sizeof(volHeader));
4233 osi_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4234 volHeader.inUse = 0;
4235 volHeader.needsSalvaged = 0;
4236 volHeader.inService = 1;
4237 volHeader.dontSalvage = DONT_SALVAGE;
4239 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4240 osi_Assert(nBytes == sizeof(volHeader));
4245 * Possible delete the volume.
4247 * deleteMe - Always do so, only a partial volume.
4250 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4251 char *message, int deleteMe, int check)
4253 if (readOnly(isp) || deleteMe) {
4254 if (isp->volSummary && !isp->volSummary->deleted) {
4257 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp->volumeId);
4259 Log("It will be deleted on this server (you may find it elsewhere)\n");
4262 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp->volumeId);
4264 Log("it will be deleted instead. It should be recloned.\n");
4269 char filename[VMAXPATHLEN];
4270 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4271 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4273 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4275 Log("Error %ld destroying volume disk header for volume %lu\n",
4276 afs_printable_int32_ld(code),
4277 afs_printable_uint32_lu(isp->volumeId));
4280 /* make sure we actually delete the header file; ENOENT
4281 * is fine, since VDestroyVolumeDiskHeader probably already
4283 if (unlink(path) && errno != ENOENT) {
4284 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4286 if (salvinfo->useFSYNC) {
4287 AskDelete(salvinfo, isp->volumeId);
4289 isp->volSummary->deleted = 1;
4292 } else if (!check) {
4293 Log("%s salvage was unsuccessful: read-write volume %u\n", message,
4295 Abort("Salvage of volume %u aborted\n", isp->volumeId);
4299 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4301 * Locks a volume on disk for salvaging.
4303 * @param[in] volumeId volume ID to lock
4305 * @return operation status
4307 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4308 * checked out and locked again
4313 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4318 /* should always be WRITE_LOCK, but keep the lock-type logic all
4319 * in one place, in VVolLockType. Params will be ignored, but
4320 * try to provide what we're logically doing. */
4321 locktype = VVolLockType(V_VOLUPD, 1);
4323 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4325 if (code == EBUSY) {
4326 Abort("Someone else appears to be using volume %lu; Aborted\n",
4327 afs_printable_uint32_lu(volumeId));
4329 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4330 afs_printable_int32_ld(code),
4331 afs_printable_uint32_lu(volumeId));
4334 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4335 if (code == SYNC_DENIED) {
4336 /* need to retry checking out volumes */
4339 if (code != SYNC_OK) {
4340 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4341 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4344 /* set inUse = programType in the volume header to ensure that nobody
4345 * tries to use this volume again without salvaging, if we somehow crash
4346 * or otherwise exit before finishing the salvage.
4350 struct VolumeHeader header;
4351 struct VolumeDiskHeader diskHeader;
4352 struct VolumeDiskData volHeader;
4354 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4359 DiskToVolumeHeader(&header, &diskHeader);
4361 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4362 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4363 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4369 volHeader.inUse = programType;
4371 /* If we can't re-write the header, bail out and error. We don't
4372 * assert when reading the header, since it's possible the
4373 * header isn't really there (when there's no data associated
4374 * with the volume; we just delete the vol header file in that
4375 * case). But if it's there enough that we can read it, but
4376 * somehow we cannot write to it to signify we're salvaging it,
4377 * we've got a big problem and we cannot continue. */
4378 osi_Assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
4385 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4388 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4390 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4392 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4393 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4394 if (code != SYNC_OK) {
4395 Log("AskError: failed to force volume %lu into error state; "
4396 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4397 (long)code, SYNC_res2string(code));
4399 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4403 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4408 memset(&res, 0, sizeof(res));
4410 for (i = 0; i < 3; i++) {
4411 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4412 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4414 if (code == SYNC_OK) {
4416 } else if (code == SYNC_DENIED) {
4418 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4420 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4421 Abort("Salvage aborted\n");
4422 } else if (code == SYNC_BAD_COMMAND) {
4423 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4426 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4427 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4429 Log("AskOffline: fileserver is DAFS but we are not.\n");
4432 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4433 Log("AskOffline: fileserver is not DAFS but we are.\n");
4435 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4438 Abort("Salvage aborted\n");
4441 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4442 FSYNC_clientFinis();
4446 if (code != SYNC_OK) {
4447 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4448 Abort("Salvage aborted\n");
4452 /* don't want to pass around state; remember it here */
4453 static int isDAFS = -1;
4458 afs_int32 code = 1, i;
4460 /* we don't care if we race. the answer shouldn't change */
4464 memset(&res, 0, sizeof(res));
4466 for (i = 0; code && i < 3; i++) {
4467 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4469 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4470 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4471 FSYNC_reason2string(res.hdr.reason));
4472 FSYNC_clientFinis();
4478 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4482 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4492 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4494 struct VolumeDiskHeader diskHdr;
4496 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4498 /* volume probably does not exist; no need to bring back online */
4501 AskOnline(salvinfo, volumeId);
4505 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4509 for (i = 0; i < 3; i++) {
4510 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4511 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4513 if (code == SYNC_OK) {
4515 } else if (code == SYNC_DENIED) {
4516 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4517 } else if (code == SYNC_BAD_COMMAND) {
4518 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4520 Log("AskOnline: please make sure file server binaries are same version.\n");
4524 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4525 FSYNC_clientFinis();
4532 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4537 for (i = 0; i < 3; i++) {
4538 memset(&res, 0, sizeof(res));
4539 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4540 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4542 if (code == SYNC_OK) {
4544 } else if (code == SYNC_DENIED) {
4545 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId, salvinfo->fileSysPartition->name);
4546 } else if (code == SYNC_BAD_COMMAND) {
4547 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4550 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4551 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4553 Log("AskOnline: fileserver is DAFS but we are not.\n");
4556 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4557 Log("AskOnline: fileserver is not DAFS but we are.\n");
4559 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4563 } else if (code == SYNC_FAILED &&
4564 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4565 res.hdr.reason == FSYNC_WRONG_PART)) {
4566 /* volume is already effectively 'deleted' */
4570 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4571 FSYNC_clientFinis();
4578 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4580 /* Volume parameter is passed in case iopen is upgraded in future to
4581 * require a volume Id to be passed
4584 IHandle_t *srcH, *destH;
4585 FdHandle_t *srcFdP, *destFdP;
4587 afs_foff_t size = 0;
4589 IH_INIT(srcH, device, rwvolume, inode1);
4590 srcFdP = IH_OPEN(srcH);
4591 osi_Assert(srcFdP != NULL);
4592 IH_INIT(destH, device, rwvolume, inode2);
4593 destFdP = IH_OPEN(destH);
4594 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4595 osi_Assert(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4598 osi_Assert(nBytes == 0);
4599 FDH_REALLYCLOSE(srcFdP);
4600 FDH_REALLYCLOSE(destFdP);
4607 PrintInodeList(struct SalvInfo *salvinfo)
4609 struct ViceInodeInfo *ip;
4610 struct ViceInodeInfo *buf;
4613 afs_sfsize_t st_size;
4615 st_size = OS_SIZE(salvinfo->inodeFd);
4616 osi_Assert(st_size >= 0);
4617 buf = (struct ViceInodeInfo *)malloc(st_size);
4618 osi_Assert(buf != NULL);
4619 nInodes = st_size / sizeof(struct ViceInodeInfo);
4620 osi_Assert(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4621 for (ip = buf; nInodes--; ip++) {
4622 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4623 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4624 (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1],
4625 ip->u.param[2], ip->u.param[3]);
4631 PrintInodeSummary(struct SalvInfo *salvinfo)
4634 struct InodeSummary *isp;
4636 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4637 isp = &salvinfo->inodeSummary[i];
4638 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp->volumeId, isp->RWvolumeId, isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4648 osi_Assert(0); /* Fork is never executed in the NT code path */
4652 #ifdef AFS_DEMAND_ATTACH_FS
4653 if ((f == 0) && (programType == salvageServer)) {
4654 /* we are a salvageserver child */
4655 #ifdef FSSYNC_BUILD_CLIENT
4656 VChildProcReconnectFS_r();
4658 #ifdef SALVSYNC_BUILD_CLIENT
4662 #endif /* AFS_DEMAND_ATTACH_FS */
4663 #endif /* !AFS_NT40_ENV */
4673 #ifdef AFS_DEMAND_ATTACH_FS
4674 if (programType == salvageServer) {
4675 #ifdef SALVSYNC_BUILD_CLIENT
4678 #ifdef FSSYNC_BUILD_CLIENT
4682 #endif /* AFS_DEMAND_ATTACH_FS */
4685 if (main_thread != pthread_self())
4686 pthread_exit((void *)code);
4699 pid = wait(&status);
4700 osi_Assert(pid != -1);
4701 if (WCOREDUMP(status))
4702 Log("\"%s\" core dumped!\n", prog);
4703 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4709 TimeStamp(time_t clock, int precision)
4712 static char timestamp[20];
4713 lt = localtime(&clock);
4715 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4717 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4722 CheckLogFile(char * log_path)
4724 char oldSlvgLog[AFSDIR_PATH_MAX];
4726 #ifndef AFS_NT40_ENV
4733 strcpy(oldSlvgLog, log_path);
4734 strcat(oldSlvgLog, ".old");
4736 renamefile(log_path, oldSlvgLog);
4737 logFile = afs_fopen(log_path, "a");
4739 if (!logFile) { /* still nothing, use stdout */
4743 #ifndef AFS_NAMEI_ENV
4744 AFS_DEBUG_IOPS_LOG(logFile);
4749 #ifndef AFS_NT40_ENV
4751 TimeStampLogFile(char * log_path)
4753 char stampSlvgLog[AFSDIR_PATH_MAX];
4758 lt = localtime(&now);
4759 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4760 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4761 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4762 lt->tm_min, lt->tm_sec);
4764 /* try to link the logfile to a timestamped filename */
4765 /* if it fails, oh well, nothing we can do */
4766 link(log_path, stampSlvgLog);
4775 #ifndef AFS_NT40_ENV
4777 printf("Can't show log since using syslog.\n");
4788 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4791 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4794 while (fgets(line, sizeof(line), logFile))
4801 Log(const char *format, ...)
4807 va_start(args, format);
4808 vsnprintf(tmp, sizeof tmp, format, args);
4810 #ifndef AFS_NT40_ENV
4812 syslog(LOG_INFO, "%s", tmp);
4816 gettimeofday(&now, NULL);
4817 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4823 Abort(const char *format, ...)
4828 va_start(args, format);
4829 vsnprintf(tmp, sizeof tmp, format, args);
4831 #ifndef AFS_NT40_ENV
4833 syslog(LOG_INFO, "%s", tmp);
4837 fprintf(logFile, "%s", tmp);
4849 ToString(const char *s)
4853 osi_Assert(p != NULL);
4857 /* Remove the FORCESALVAGE file */
4859 RemoveTheForce(char *path)
4862 struct afs_stat_st force; /* so we can use afs_stat to find it */
4863 strcpy(target,path);
4864 strcat(target,"/FORCESALVAGE");
4865 if (!Testing && ForceSalvage) {
4866 if (afs_stat(target,&force) == 0) unlink(target);
4870 #ifndef AFS_AIX32_ENV
4872 * UseTheForceLuke - see if we can use the force
4875 UseTheForceLuke(char *path)
4877 struct afs_stat_st force;
4879 strcpy(target,path);
4880 strcat(target,"/FORCESALVAGE");
4882 return (afs_stat(target, &force) == 0);
4886 * UseTheForceLuke - see if we can use the force
4889 * The VRMIX fsck will not muck with the filesystem it is supposedly
4890 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4891 * muck directly with the root inode, which is within the normal
4893 * ListViceInodes() has a side effect of setting ForceSalvage if
4894 * it detects a need, based on root inode examination.
4897 UseTheForceLuke(char *path)
4900 return 0; /* sorry OB1 */
4905 /* NT support routines */
4907 static char execpathname[MAX_PATH];
4909 nt_SalvagePartition(char *partName, int jobn)
4914 if (!*execpathname) {
4915 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4916 if (!n || n == 1023)
4919 job.cj_magic = SALVAGER_MAGIC;
4920 job.cj_number = jobn;
4921 (void)strcpy(job.cj_part, partName);
4922 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4927 nt_SetupPartitionSalvage(void *datap, int len)
4929 childJob_t *jobp = (childJob_t *) datap;
4930 char logname[AFSDIR_PATH_MAX];
4932 if (len != sizeof(childJob_t))
4934 if (jobp->cj_magic != SALVAGER_MAGIC)
4939 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4941 logFile = afs_fopen(logname, "w");
4949 #endif /* AFS_NT40_ENV */