2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
89 #include <afs/procmgmt.h>
92 #ifdef HAVE_SYS_FILE_H
93 # include <sys/file.h>
97 #include <WINNT/afsevent.h>
100 #define WCOREDUMP(x) ((x) & 0200)
103 #ifdef AFS_PTHREAD_ENV
104 # include <opr/lock.h>
107 #include <afs/afsint.h>
108 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
109 #if defined(AFS_VFSINCL_ENV)
110 #include <sys/vnode.h>
112 #include <sys/fs/ufs_inode.h>
114 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
115 #include <ufs/ufs/dinode.h>
116 #include <ufs/ffs/fs.h>
118 #include <ufs/inode.h>
121 #else /* AFS_VFSINCL_ENV */
123 #include <ufs/inode.h>
124 #else /* AFS_OSF_ENV */
125 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
126 #include <sys/inode.h>
129 #endif /* AFS_VFSINCL_ENV */
130 #endif /* AFS_SGI_ENV */
133 #include <sys/lockf.h>
136 #include <checklist.h>
138 #if defined(AFS_SGI_ENV)
141 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
143 #include <sys/mnttab.h>
144 #include <sys/mntent.h>
149 #endif /* AFS_SGI_ENV */
150 #endif /* AFS_HPUX_ENV */
154 #include <afs/osi_inode.h>
158 #include <afs/afsutil.h>
159 #include <afs/fileutil.h>
160 #include <rx/rx_queue.h>
165 #include <afs/afssyscalls.h>
169 #include "partition.h"
170 #include "daemon_com.h"
171 #include "daemon_com_inline.h"
173 #include "fssync_inline.h"
174 #include "volume_inline.h"
175 #include "salvsync.h"
176 #include "viceinode.h"
178 #include "volinodes.h" /* header magic number, etc. stuff */
179 #include "vol-salvage.h"
181 #include "vol_internal.h"
183 #include <afs/prs_fs.h>
185 #ifdef FSSYNC_BUILD_CLIENT
186 #include "vg_cache.h"
194 extern void *calloc();
196 static char *TimeStamp(time_t clock, int precision);
199 int debug; /* -d flag */
200 extern int Testing; /* -n flag */
201 int ListInodeOption; /* -i flag */
202 int ShowRootFiles; /* -r flag */
203 int RebuildDirs; /* -sal flag */
204 int Parallel = 4; /* -para X flag */
205 int PartsPerDisk = 8; /* Salvage up to 8 partitions on same disk sequentially */
206 int forceR = 0; /* -b flag */
207 int ShowLog = 0; /* -showlog flag */
208 int ShowSuid = 0; /* -showsuid flag */
209 int ShowMounts = 0; /* -showmounts flag */
210 int orphans = ORPH_IGNORE; /* -orphans option */
215 int useSyslog = 0; /* -syslog flag */
216 int useSyslogFacility = LOG_DAEMON; /* -syslogfacility option */
225 #define MAXPARALLEL 32
227 int OKToZap; /* -o flag */
228 int ForceSalvage; /* If salvage should occur despite the DONT_SALVAGE flag
229 * in the volume header */
231 FILE *logFile = 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
233 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
236 * information that is 'global' to a particular salvage job.
239 Device fileSysDevice; /**< The device number of the current partition
241 char fileSysPath[9]; /**< The path of the mounted partition currently
242 * being salvaged, i.e. the directory containing
243 * the volume headers */
244 char *fileSysPathName; /**< NT needs this to make name pretty log. */
245 IHandle_t *VGLinkH; /**< Link handle for current volume group. */
246 int VGLinkH_cnt; /**< # of references to lnk handle. */
247 struct DiskPartition64 *fileSysPartition; /**< Partition being salvaged */
250 char *fileSysDeviceName; /**< The block device where the file system being
251 * salvaged was mounted */
252 char *filesysfulldev;
254 int VolumeChanged; /**< Set by any routine which would change the
255 * volume in a way which would require callbacks
256 * to be broken if the volume was put back on
257 * on line by an active file server */
259 VolumeDiskData VolInfo; /**< A copy of the last good or salvaged volume
260 * header dealt with */
262 int nVolumesInInodeFile; /**< Number of read-write volumes summarized */
263 FD_t inodeFd; /**< File descriptor for inode file */
265 struct VolumeSummary *volumeSummaryp; /**< Holds all the volumes in a part */
266 int nVolumes; /**< Number of volumes (read-write and read-only)
267 * in volume summary */
268 struct InodeSummary *inodeSummary; /**< contains info on all the relevant
271 struct VnodeInfo vnodeInfo[nVNODECLASSES]; /**< contains info on all of the
272 * vnodes in the volume that
273 * we are currently looking
275 int useFSYNC; /**< 0 if the fileserver is unavailable; 1 if we should try
276 * to contact the fileserver over FSYNC */
283 /* Forward declarations */
284 static int IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode);
285 static int AskVolumeSummary(struct SalvInfo *salvinfo,
286 VolumeId singleVolumeNumber);
287 static void MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId);
288 static void AskError(struct SalvInfo *salvinfo, VolumeId volumeId);
290 #ifdef AFS_DEMAND_ATTACH_FS
291 static int LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId);
292 #endif /* AFS_DEMAND_ATTACH_FS */
294 /* Uniquifier stored in the Inode */
299 return (u & 0x3fffff);
301 #if defined(AFS_SGI_EXMAG)
302 return (u & SGI_UNIQMASK);
305 #endif /* AFS_SGI_EXMAG */
312 if (aerror == EPERM || aerror == ENXIO || aerror == ENOENT)
314 return 0; /* otherwise may be transient, e.g. EMFILE */
319 char *save_args[MAX_ARGS];
321 extern pthread_t main_thread;
322 childJob_t myjob = { SALVAGER_MAGIC, NOT_CHILD, "" };
326 * Get the salvage lock if not already held. Hold until process exits.
328 * @param[in] locktype READ_LOCK or WRITE_LOCK
331 _ObtainSalvageLock(int locktype)
333 struct VLockFile salvageLock;
338 VLockFileInit(&salvageLock, AFSDIR_SERVER_SLVGLOCK_FILEPATH);
340 code = VLockFileLock(&salvageLock, offset, locktype, nonblock);
343 "salvager: There appears to be another salvager running! "
348 "salvager: Error %d trying to acquire salvage lock! "
354 ObtainSalvageLock(void)
356 _ObtainSalvageLock(WRITE_LOCK);
359 ObtainSharedSalvageLock(void)
361 _ObtainSalvageLock(READ_LOCK);
365 #ifdef AFS_SGI_XFS_IOPS_ENV
366 /* Check if the given partition is mounted. For XFS, the root inode is not a
367 * constant. So we check the hard way.
370 IsPartitionMounted(char *part)
373 struct mntent *mntent;
375 opr_Verify(mntfp = setmntent(MOUNTED, "r"));
376 while (mntent = getmntent(mntfp)) {
377 if (!strcmp(part, mntent->mnt_dir))
382 return mntent ? 1 : 1;
385 /* Check if the given inode is the root of the filesystem. */
386 #ifndef AFS_SGI_XFS_IOPS_ENV
388 IsRootInode(struct afs_stat_st *status)
391 * The root inode is not a fixed value in XFS partitions. So we need to
392 * see if the partition is in the list of mounted partitions. This only
393 * affects the SalvageFileSys path, so we check there.
395 return (status->st_ino == ROOTINODE);
400 #ifndef AFS_NAMEI_ENV
401 /* We don't want to salvage big files filesystems, since we can't put volumes on
405 CheckIfBigFilesFS(char *mountPoint, char *devName)
407 struct superblock fs;
410 if (strncmp(devName, "/dev/", 5)) {
411 (void)sprintf(name, "/dev/%s", devName);
413 (void)strcpy(name, devName);
416 if (ReadSuper(&fs, name) < 0) {
417 Log("Unable to read superblock. Not salvaging partition %s.\n",
421 if (IsBigFilesFileSystem(&fs)) {
422 Log("Partition %s is a big files filesystem, not salvaging.\n",
432 #define HDSTR "\\Device\\Harddisk"
433 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
435 SameDisk(struct DiskPartition64 *p1, struct DiskPartition64 *p2)
441 static int dowarn = 1;
443 if (!QueryDosDevice(p1->devName, res1, RES_LEN - 1))
445 if (strncmp(res1, HDSTR, HDLEN)) {
448 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
449 res1, HDSTR, p1->devName);
452 if (!QueryDosDevice(p2->devName, res2, RES_LEN - 1))
454 if (strncmp(res2, HDSTR, HDLEN)) {
457 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
458 res2, HDSTR, p2->devName);
462 return (0 == _strnicmp(res1, res2, RES_LEN - 1));
465 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
468 /* This assumes that two partitions with the same device number divided by
469 * PartsPerDisk are on the same disk.
472 SalvageFileSysParallel(struct DiskPartition64 *partP)
475 struct DiskPartition64 *partP;
476 int pid; /* Pid for this job */
477 int jobnumb; /* Log file job number */
478 struct job *nextjob; /* Next partition on disk to salvage */
480 static struct job *jobs[MAXPARALLEL] = { 0 }; /* Need to zero this */
481 struct job *thisjob = 0;
482 static int numjobs = 0;
483 static int jobcount = 0;
489 char logFileName[256];
493 /* We have a partition to salvage. Copy it into thisjob */
494 thisjob = calloc(1, sizeof(struct job));
496 Log("Can't salvage '%s'. Not enough memory\n", partP->name);
499 thisjob->partP = partP;
500 thisjob->jobnumb = jobcount;
502 } else if (jobcount == 0) {
503 /* We are asking to wait for all jobs (partp == 0), yet we never
506 Log("No file system partitions named %s* found; not salvaged\n",
507 VICE_PARTITION_PREFIX);
511 if (debug || Parallel == 1) {
513 SalvageFileSys(thisjob->partP, 0);
520 /* Check to see if thisjob is for a disk that we are already
521 * salvaging. If it is, link it in as the next job to do. The
522 * jobs array has 1 entry per disk being salvages. numjobs is
523 * the total number of disks currently being salvaged. In
524 * order to keep thejobs array compact, when a disk is
525 * completed, the hightest element in the jobs array is moved
526 * down to now open slot.
528 for (j = 0; j < numjobs; j++) {
529 if (SameDisk(jobs[j]->partP, thisjob->partP)) {
530 /* On same disk, add it to this list and return */
531 thisjob->nextjob = jobs[j]->nextjob;
532 jobs[j]->nextjob = thisjob;
539 /* Loop until we start thisjob or until all existing jobs are finished */
540 while (thisjob || (!partP && (numjobs > 0))) {
541 startjob = -1; /* No new job to start */
543 if ((numjobs >= Parallel) || (!partP && (numjobs > 0))) {
544 /* Either the max jobs are running or we have to wait for all
545 * the jobs to finish. In either case, we wait for at least one
546 * job to finish. When it's done, clean up after it.
548 pid = wait(&wstatus);
549 opr_Assert(pid != -1);
550 for (j = 0; j < numjobs; j++) { /* Find which job it is */
551 if (pid == jobs[j]->pid)
554 opr_Assert(j < numjobs);
555 if (WCOREDUMP(wstatus)) { /* Say if the job core dumped */
556 Log("Salvage of %s core dumped!\n", jobs[j]->partP->name);
559 numjobs--; /* job no longer running */
560 oldjob = jobs[j]; /* remember */
561 jobs[j] = jobs[j]->nextjob; /* Step to next part on same disk */
562 free(oldjob); /* free the old job */
564 /* If there is another partition on the disk to salvage, then
565 * say we will start it (startjob). If not, then put thisjob there
566 * and say we will start it.
568 if (jobs[j]) { /* Another partitions to salvage */
569 startjob = j; /* Will start it */
570 } else { /* There is not another partition to salvage */
572 jobs[j] = thisjob; /* Add thisjob */
574 startjob = j; /* Will start it */
576 jobs[j] = jobs[numjobs]; /* Move last job up to this slot */
577 startjob = -1; /* Don't start it - already running */
581 /* We don't have to wait for a job to complete */
583 jobs[numjobs] = thisjob; /* Add this job */
585 startjob = numjobs; /* Will start it */
589 /* Start up a new salvage job on a partition in job slot "startjob" */
590 if (startjob != -1) {
592 Log("Starting salvage of file system partition %s\n",
593 jobs[startjob]->partP->name);
595 /* For NT, we not only fork, but re-exec the salvager. Pass in the
596 * commands and pass the child job number via the data path.
599 nt_SalvagePartition(jobs[startjob]->partP->name,
600 jobs[startjob]->jobnumb);
601 jobs[startjob]->pid = pid;
606 jobs[startjob]->pid = pid;
612 for (fd = 0; fd < 16; fd++)
619 openlog("salvager", LOG_PID, useSyslogFacility);
623 snprintf(logFileName, sizeof logFileName, "%s.%d",
624 AFSDIR_SERVER_SLVGLOG_FILEPATH,
625 jobs[startjob]->jobnumb);
626 logFile = afs_fopen(logFileName, "w");
631 SalvageFileSys1(jobs[startjob]->partP, 0);
636 } /* while ( thisjob || (!partP && numjobs > 0) ) */
638 /* If waited for all jobs to complete, now collect log files and return */
640 if (!useSyslog) /* if syslogging - no need to collect */
643 for (i = 0; i < jobcount; i++) {
644 snprintf(logFileName, sizeof logFileName, "%s.%d",
645 AFSDIR_SERVER_SLVGLOG_FILEPATH, i);
646 if ((passLog = afs_fopen(logFileName, "r"))) {
647 while (fgets(buf, sizeof(buf), passLog)) {
652 (void)unlink(logFileName);
661 SalvageFileSys(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
663 if (!canfork || debug || Fork() == 0) {
664 SalvageFileSys1(partP, singleVolumeNumber);
665 if (canfork && !debug) {
670 Wait("SalvageFileSys");
674 get_DevName(char *pbuffer, char *wpath)
676 char pbuf[128], *ptr;
677 strcpy(pbuf, pbuffer);
678 ptr = (char *)strrchr(pbuf, OS_DIRSEPC);
684 ptr = (char *)strrchr(pbuffer, OS_DIRSEPC);
686 strcpy(pbuffer, ptr + 1);
693 SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber)
696 char inodeListPath[256];
697 FD_t inodeFile = INVALID_FD;
698 static char tmpDevName[100];
699 static char wpath[100];
700 struct VolumeSummary *vsp, *esp;
704 struct SalvInfo l_salvinfo;
705 struct SalvInfo *salvinfo = &l_salvinfo;
708 memset(salvinfo, 0, sizeof(*salvinfo));
711 if (inodeFile != INVALID_FD) {
713 inodeFile = INVALID_FD;
715 if (tries > VOL_MAX_CHECKOUT_RETRIES) {
716 Abort("Raced too many times with fileserver restarts while trying to "
717 "checkout/lock volumes; Aborted\n");
719 #ifdef AFS_DEMAND_ATTACH_FS
721 /* unlock all previous volume locks, since we're about to lock them
723 VLockFileReinit(&partP->volLockFile);
725 #endif /* AFS_DEMAND_ATTACH_FS */
727 salvinfo->fileSysPartition = partP;
728 salvinfo->fileSysDevice = salvinfo->fileSysPartition->device;
729 salvinfo->fileSysPathName = VPartitionPath(salvinfo->fileSysPartition);
732 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
733 (void)sprintf(salvinfo->fileSysPath, "%s" OS_DIRSEP, salvinfo->fileSysPathName);
734 name = partP->devName;
736 strlcpy(salvinfo->fileSysPath, salvinfo->fileSysPathName, sizeof(salvinfo->fileSysPath));
737 strcpy(tmpDevName, partP->devName);
738 name = get_DevName(tmpDevName, wpath);
739 salvinfo->fileSysDeviceName = name;
740 salvinfo->filesysfulldev = wpath;
743 if (singleVolumeNumber) {
744 #ifndef AFS_DEMAND_ATTACH_FS
745 /* only non-DAFS locks the partition when salvaging a single volume;
746 * DAFS will lock the individual volumes in the VG */
747 VLockPartition(partP->name);
748 #endif /* !AFS_DEMAND_ATTACH_FS */
752 /* salvageserver already setup fssync conn for us */
753 if ((programType != salvageServer) && !VConnectFS()) {
754 Abort("Couldn't connect to file server\n");
757 salvinfo->useFSYNC = 1;
758 AskOffline(salvinfo, singleVolumeNumber);
759 #ifdef AFS_DEMAND_ATTACH_FS
760 if (LockVolume(salvinfo, singleVolumeNumber)) {
763 #endif /* AFS_DEMAND_ATTACH_FS */
766 salvinfo->useFSYNC = 0;
767 VLockPartition(partP->name);
771 ForceSalvage = UseTheForceLuke(salvinfo->fileSysPath);
774 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
775 partP->name, name, (Testing ? "(READONLY mode)" : ""));
777 Log("***Forced salvage of all volumes on this partition***\n");
782 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
789 opr_Verify((dirp = opendir(salvinfo->fileSysPath)) != NULL);
790 while ((dp = readdir(dirp))) {
791 if (!strncmp(dp->d_name, "salvage.inodes.", 15)
792 || !strncmp(dp->d_name, "salvage.temp.", 13)) {
794 Log("Removing old salvager temp files %s\n", dp->d_name);
795 strcpy(npath, salvinfo->fileSysPath);
796 strcat(npath, OS_DIRSEP);
797 strcat(npath, dp->d_name);
803 tdir = (tmpdir ? tmpdir : salvinfo->fileSysPath);
805 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
806 (void)strncpy(inodeListPath, _tempnam(tdir, "salvage.inodes."), 255);
808 snprintf(inodeListPath, 255, "%s" OS_DIRSEP "salvage.inodes.%s.%d", tdir, name,
812 inodeFile = OS_OPEN(inodeListPath, O_RDWR|O_TRUNC|O_CREAT, 0666);
813 if (inodeFile == INVALID_FD) {
814 Abort("Error %d when creating inode description file %s; not salvaged\n", errno, inodeListPath);
817 /* Using nt_unlink here since we're really using the delete on close
818 * semantics of unlink. In most places in the salvager, we really do
819 * mean to unlink the file at that point. Those places have been
820 * modified to actually do that so that the NT crt can be used there.
822 * jaltman - On NT delete on close cannot be applied to a file while the
823 * process has an open file handle that does not have DELETE file
824 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
825 * delete privileges. As a result the nt_unlink() call will always
828 code = nt_unlink(inodeListPath);
830 code = unlink(inodeListPath);
833 Log("Error %d when trying to unlink %s\n", errno, inodeListPath);
836 if (GetInodeSummary(salvinfo, inodeFile, singleVolumeNumber) < 0) {
840 salvinfo->inodeFd = inodeFile;
841 if (salvinfo->inodeFd == INVALID_FD)
842 Abort("Temporary file %s is missing...\n", inodeListPath);
843 OS_SEEK(salvinfo->inodeFd, 0L, SEEK_SET);
844 if (ListInodeOption) {
845 PrintInodeList(salvinfo);
846 if (singleVolumeNumber) {
847 /* We've checked out the volume from the fileserver, and we need
848 * to give it back. We don't know if the volume exists or not,
849 * so we don't know whether to AskOnline or not. Try to determine
850 * if the volume exists by trying to read the volume header, and
851 * AskOnline if it is readable. */
852 MaybeAskOnline(salvinfo, singleVolumeNumber);
856 /* enumerate volumes in the partition.
857 * figure out sets of read-only + rw volumes.
858 * salvage each set, read-only volumes first, then read-write.
859 * Fix up inodes on last volume in set (whether it is read-write
862 if (GetVolumeSummary(salvinfo, singleVolumeNumber)) {
866 if (singleVolumeNumber) {
867 /* If we delete a volume during the salvage, we indicate as such by
868 * setting the volsummary->deleted field. We need to know if we
869 * deleted a volume or not in order to know which volumes to bring
870 * back online after the salvage. If we fork, we will lose this
871 * information, since volsummary->deleted will not get set in the
872 * parent. So, don't fork. */
876 for (i = j = 0, vsp = salvinfo->volumeSummaryp, esp = vsp + salvinfo->nVolumes;
877 i < salvinfo->nVolumesInInodeFile; i = j) {
878 VolumeId rwvid = salvinfo->inodeSummary[i].RWvolumeId;
880 j < salvinfo->nVolumesInInodeFile && salvinfo->inodeSummary[j].RWvolumeId == rwvid;
882 VolumeId vid = salvinfo->inodeSummary[j].volumeId;
883 struct VolumeSummary *tsp;
884 /* Scan volume list (from partition root directory) looking for the
885 * current rw volume number in the volume list from the inode scan.
886 * If there is one here that is not in the inode volume list,
888 for (; vsp < esp && (vsp->header.parent < rwvid); vsp++) {
890 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
892 /* Now match up the volume summary info from the root directory with the
893 * entry in the volume list obtained from scanning inodes */
894 salvinfo->inodeSummary[j].volSummary = NULL;
895 for (tsp = vsp; tsp < esp && (tsp->header.parent == rwvid); tsp++) {
896 if (tsp->header.id == vid) {
897 salvinfo->inodeSummary[j].volSummary = tsp;
903 /* Salvage the group of volumes (several read-only + 1 read/write)
904 * starting with the current read-only volume we're looking at.
907 nt_SalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
909 DoSalvageVolumeGroup(salvinfo, &salvinfo->inodeSummary[i], j - i);
910 #endif /* AFS_NT40_ENV */
914 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
915 for (; vsp < esp; vsp++) {
917 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
920 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
921 RemoveTheForce(salvinfo->fileSysPath);
923 if (!Testing && singleVolumeNumber) {
925 #ifdef AFS_DEMAND_ATTACH_FS
926 /* unlock vol headers so the fs can attach them when we AskOnline */
927 VLockFileReinit(&salvinfo->fileSysPartition->volLockFile);
928 #endif /* AFS_DEMAND_ATTACH_FS */
930 /* Step through the volumeSummary list and set all volumes on-line.
931 * Most volumes were taken off-line in GetVolumeSummary.
932 * If a volume was deleted, don't tell the fileserver anything, since
933 * we already told the fileserver the volume was deleted back when we
934 * we destroyed the volume header.
935 * Also, make sure we bring the singleVolumeNumber back online first.
938 for (j = 0; j < salvinfo->nVolumes; j++) {
939 if (salvinfo->volumeSummaryp[j].header.id == singleVolumeNumber) {
941 if (!salvinfo->volumeSummaryp[j].deleted) {
942 AskOnline(salvinfo, singleVolumeNumber);
948 /* If singleVolumeNumber is not in our volumeSummary, it means that
949 * at least one other volume in the VG is on the partition, but the
950 * RW volume is not. We've already AskOffline'd it by now, though,
951 * so make sure we don't still have the volume checked out. */
952 AskDelete(salvinfo, singleVolumeNumber);
955 for (j = 0; j < salvinfo->nVolumes; j++) {
956 if (salvinfo->volumeSummaryp[j].header.id != singleVolumeNumber) {
957 if (!salvinfo->volumeSummaryp[j].deleted) {
958 AskOnline(salvinfo, salvinfo->volumeSummaryp[j].header.id);
964 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
965 salvinfo->fileSysPartition->name, (Testing ? " (READONLY mode)" : ""));
968 OS_CLOSE(inodeFile); /* SalvageVolumeGroup was the last which needed it. */
972 DeleteExtraVolumeHeaderFile(struct SalvInfo *salvinfo, struct VolumeSummary *vsp)
975 char filename[VMAXPATHLEN];
981 VolumeExternalName_r(vsp->header.id, filename, sizeof(filename));
982 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
985 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path, (Testing ? "would have been " : ""));
988 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, vsp->header.id, vsp->header.parent);
990 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
991 afs_printable_int32_ld(code),
992 afs_printable_VolumeId_lu(vsp->header.id));
995 /* make sure we actually delete the header file; ENOENT
996 * is fine, since VDestroyVolumeDiskHeader probably already
998 if (unlink(path) && errno != ENOENT) {
999 Log("Unable to unlink %s (errno = %d)\n", path, errno);
1001 if (salvinfo->useFSYNC) {
1002 AskDelete(salvinfo, vsp->header.id);
1009 CompareInodes(const void *_p1, const void *_p2)
1011 const struct ViceInodeInfo *p1 = _p1;
1012 const struct ViceInodeInfo *p2 = _p2;
1013 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1014 || p2->u.vnode.vnodeNumber == INODESPECIAL) {
1015 VolumeId p1rwid, p2rwid;
1017 (p1->u.vnode.vnodeNumber ==
1018 INODESPECIAL ? p1->u.special.parentId : p1->u.vnode.volumeId);
1020 (p2->u.vnode.vnodeNumber ==
1021 INODESPECIAL ? p2->u.special.parentId : p2->u.vnode.volumeId);
1022 if (p1rwid < p2rwid)
1024 if (p1rwid > p2rwid)
1026 if (p1->u.vnode.vnodeNumber == INODESPECIAL
1027 && p2->u.vnode.vnodeNumber == INODESPECIAL) {
1028 if (p1->u.vnode.volumeId == p2->u.vnode.volumeId)
1029 return (p1->u.special.type < p2->u.special.type ? -1 : 1);
1030 if (p1->u.vnode.volumeId == p1rwid)
1032 if (p2->u.vnode.volumeId == p2rwid)
1034 return (p1->u.vnode.volumeId < p2->u.vnode.volumeId ? -1 : 1);
1036 if (p1->u.vnode.vnodeNumber != INODESPECIAL)
1037 return (p2->u.vnode.volumeId == p2rwid ? 1 : -1);
1038 return (p1->u.vnode.volumeId == p1rwid ? -1 : 1);
1040 if (p1->u.vnode.volumeId < p2->u.vnode.volumeId)
1042 if (p1->u.vnode.volumeId > p2->u.vnode.volumeId)
1044 if (p1->u.vnode.vnodeNumber < p2->u.vnode.vnodeNumber)
1046 if (p1->u.vnode.vnodeNumber > p2->u.vnode.vnodeNumber)
1048 /* The following tests are reversed, so that the most desirable
1049 * of several similar inodes comes first */
1050 if (p1->u.vnode.vnodeUniquifier > p2->u.vnode.vnodeUniquifier) {
1051 #ifdef AFS_3DISPARES
1052 if (p1->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1053 p2->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1056 #ifdef AFS_SGI_EXMAG
1057 if (p1->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1058 p2->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1063 if (p1->u.vnode.vnodeUniquifier < p2->u.vnode.vnodeUniquifier) {
1064 #ifdef AFS_3DISPARES
1065 if (p2->u.vnode.vnodeUniquifier > 3775414 /* 90% of 4.2M */ &&
1066 p1->u.vnode.vnodeUniquifier < 419490 /* 10% of 4.2M */ )
1069 #ifdef AFS_SGI_EXMAG
1070 if (p2->u.vnode.vnodeUniquifier > 15099494 /* 90% of 16M */ &&
1071 p1->u.vnode.vnodeUniquifier < 1677721 /* 10% of 16M */ )
1076 if (p1->u.vnode.inodeDataVersion > p2->u.vnode.inodeDataVersion) {
1077 #ifdef AFS_3DISPARES
1078 if (p1->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1079 p2->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1082 #ifdef AFS_SGI_EXMAG
1083 if (p1->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1084 p2->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1089 if (p1->u.vnode.inodeDataVersion < p2->u.vnode.inodeDataVersion) {
1090 #ifdef AFS_3DISPARES
1091 if (p2->u.vnode.inodeDataVersion > 1887437 /* 90% of 2.1M */ &&
1092 p1->u.vnode.inodeDataVersion < 209716 /* 10% of 2.1M */ )
1095 #ifdef AFS_SGI_EXMAG
1096 if (p2->u.vnode.inodeDataVersion > 15099494 /* 90% of 16M */ &&
1097 p1->u.vnode.inodeDataVersion < 1677721 /* 10% of 16M */ )
1106 CountVolumeInodes(struct ViceInodeInfo *ip, int maxInodes,
1107 struct InodeSummary *summary)
1109 VolumeId volume = ip->u.vnode.volumeId;
1110 VolumeId rwvolume = volume;
1115 while (maxInodes-- && volume == ip->u.vnode.volumeId) {
1117 if (ip->u.vnode.vnodeNumber == INODESPECIAL) {
1119 rwvolume = ip->u.special.parentId;
1120 /* This isn't quite right, as there could (in error) be different
1121 * parent inodes in different special vnodes */
1123 if (maxunique < ip->u.vnode.vnodeUniquifier)
1124 maxunique = ip->u.vnode.vnodeUniquifier;
1128 summary->volumeId = volume;
1129 summary->RWvolumeId = rwvolume;
1130 summary->nInodes = n;
1131 summary->nSpecialInodes = nSpecial;
1132 summary->maxUniquifier = maxunique;
1136 OnlyOneVolume(struct ViceInodeInfo *inodeinfo, VolumeId singleVolumeNumber, void *rock)
1138 if (inodeinfo->u.vnode.vnodeNumber == INODESPECIAL)
1139 return (inodeinfo->u.special.parentId == singleVolumeNumber);
1140 return (inodeinfo->u.vnode.volumeId == singleVolumeNumber);
1145 * Collect list of inodes in file named by path. If a truly fatal error,
1146 * unlink the file and abort. For lessor errors, return -1. The file will
1147 * be unlinked by the caller.
1150 GetInodeSummary(struct SalvInfo *salvinfo, FD_t inodeFile, VolumeId singleVolumeNumber)
1154 struct ViceInodeInfo *ip, *ip_save;
1155 struct InodeSummary summary;
1156 char summaryFileName[50];
1157 FD_t summaryFile = INVALID_FD;
1159 char *dev = salvinfo->fileSysPath;
1160 char *wpath = salvinfo->fileSysPath;
1162 char *dev = salvinfo->fileSysDeviceName;
1163 char *wpath = salvinfo->filesysfulldev;
1165 char *part = salvinfo->fileSysPath;
1170 afs_sfsize_t st_size;
1172 /* This file used to come from vfsck; cobble it up ourselves now... */
1174 ListViceInodes(dev, salvinfo->fileSysPath, inodeFile,
1175 singleVolumeNumber ? OnlyOneVolume : 0,
1176 singleVolumeNumber, &forceSal, forceR, wpath, NULL)) < 0) {
1178 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno, dev);
1182 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev);
1184 if (forceSal && !ForceSalvage) {
1185 Log("***Forced salvage of all volumes on this partition***\n");
1188 OS_SEEK(inodeFile, 0L, SEEK_SET);
1189 salvinfo->inodeFd = inodeFile;
1190 if (salvinfo->inodeFd == INVALID_FD ||
1191 (st_size = OS_SIZE(salvinfo->inodeFd)) == -1) {
1192 Abort("No inode description file for \"%s\"; not salvaged\n", dev);
1194 tdir = (tmpdir ? tmpdir : part);
1196 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1197 (void)strcpy(summaryFileName, _tempnam(tdir, "salvage.temp."));
1199 snprintf(summaryFileName, sizeof summaryFileName,
1200 "%s" OS_DIRSEP "salvage.temp.%d", tdir, getpid());
1202 summaryFile = OS_OPEN(summaryFileName, O_RDWR|O_APPEND|O_CREAT, 0666);
1203 if (summaryFile == INVALID_FD) {
1204 Abort("Unable to create inode summary file\n");
1208 /* Using nt_unlink here since we're really using the delete on close
1209 * semantics of unlink. In most places in the salvager, we really do
1210 * mean to unlink the file at that point. Those places have been
1211 * modified to actually do that so that the NT crt can be used there.
1213 * jaltman - As commented elsewhere, this cannot work because fopen()
1214 * does not open files with DELETE and FILE_SHARE_DELETE.
1216 code = nt_unlink(summaryFileName);
1218 code = unlink(summaryFileName);
1221 Log("Error %d when trying to unlink %s\n", errno, summaryFileName);
1224 if (!canfork || debug || Fork() == 0) {
1225 int nInodes = st_size / sizeof(struct ViceInodeInfo);
1227 OS_CLOSE(summaryFile);
1228 if (!singleVolumeNumber) /* Remove the FORCESALVAGE file */
1229 RemoveTheForce(salvinfo->fileSysPath);
1231 struct VolumeSummary *vsp;
1235 GetVolumeSummary(salvinfo, singleVolumeNumber);
1237 for (i = 0, vsp = salvinfo->volumeSummaryp; i < salvinfo->nVolumes; i++) {
1239 if (vsp->header.id == singleVolumeNumber) {
1242 DeleteExtraVolumeHeaderFile(salvinfo, vsp);
1248 MaybeAskOnline(salvinfo, singleVolumeNumber);
1250 /* make sure we get rid of stray .vol headers, even if
1251 * they're not in our volume summary (might happen if
1252 * e.g. something else created them and they're not in the
1253 * fileserver VGC) */
1254 VDestroyVolumeDiskHeader(salvinfo->fileSysPartition,
1255 singleVolumeNumber, 0 /*parent*/);
1256 AskDelete(salvinfo, singleVolumeNumber);
1260 Log("%s vice inodes on %s; not salvaged\n",
1261 singleVolumeNumber ? "No applicable" : "No", dev);
1266 ip = malloc(nInodes*sizeof(struct ViceInodeInfo));
1268 OS_CLOSE(summaryFile);
1270 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1273 if (OS_READ(salvinfo->inodeFd, ip, st_size) != st_size) {
1274 OS_CLOSE(summaryFile);
1275 Abort("Unable to read inode table; %s not salvaged\n", dev);
1277 qsort(ip, nInodes, sizeof(struct ViceInodeInfo), CompareInodes);
1278 if (OS_SEEK(salvinfo->inodeFd, 0, SEEK_SET) == -1
1279 || OS_WRITE(salvinfo->inodeFd, ip, st_size) != st_size) {
1280 OS_CLOSE(summaryFile);
1281 Abort("Unable to rewrite inode table; %s not salvaged\n", dev);
1286 CountVolumeInodes(ip, nInodes, &summary);
1287 if (OS_WRITE(summaryFile, &summary, sizeof(summary)) != sizeof(summary)) {
1288 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno, dev);
1289 OS_CLOSE(summaryFile);
1293 summary.index += (summary.nInodes);
1294 nInodes -= summary.nInodes;
1295 ip += summary.nInodes;
1298 ip = ip_save = NULL;
1299 /* Following fflush is not fclose, because if it was debug mode would not work */
1300 if (OS_SYNC(summaryFile) == -1) {
1301 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno, dev);
1302 OS_CLOSE(summaryFile);
1306 if (canfork && !debug) {
1311 if (Wait("Inode summary") == -1) {
1312 OS_CLOSE(summaryFile);
1313 Exit(1); /* salvage of this partition aborted */
1317 st_size = OS_SIZE(summaryFile);
1318 opr_Assert(st_size >= 0);
1321 salvinfo->inodeSummary = malloc(st_size);
1322 opr_Assert(salvinfo->inodeSummary != NULL);
1323 /* For GNU we need to do lseek to get the file pointer moved. */
1324 opr_Assert(OS_SEEK(summaryFile, 0, SEEK_SET) == 0);
1325 ret = OS_READ(summaryFile, salvinfo->inodeSummary, st_size);
1326 opr_Assert(ret == st_size);
1328 salvinfo->nVolumesInInodeFile = st_size / sizeof(struct InodeSummary);
1329 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
1330 salvinfo->inodeSummary[i].volSummary = NULL;
1332 Log("%d nVolumesInInodeFile %lu \n",salvinfo->nVolumesInInodeFile,(unsigned long)st_size);
1333 OS_CLOSE(summaryFile);
1336 if (retcode && singleVolumeNumber && !deleted) {
1337 AskError(salvinfo, singleVolumeNumber);
1343 /* Comparison routine for volume sort.
1344 This is setup so that a read-write volume comes immediately before
1345 any read-only clones of that volume */
1347 CompareVolumes(const void *_p1, const void *_p2)
1349 const struct VolumeSummary *p1 = _p1;
1350 const struct VolumeSummary *p2 = _p2;
1351 if (p1->header.parent != p2->header.parent)
1352 return p1->header.parent < p2->header.parent ? -1 : 1;
1353 if (p1->header.id == p1->header.parent) /* p1 is rw volume */
1355 if (p2->header.id == p2->header.parent) /* p2 is rw volume */
1357 return p1->header.id < p2->header.id ? -1 : 1; /* Both read-only */
1361 * Gleans volumeSummary information by asking the fileserver
1363 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1364 * salvaging a whole partition
1366 * @return whether we obtained the volume summary information or not
1367 * @retval 0 success; we obtained the volume summary information
1368 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1370 * @retval 1 we did not get the volume summary information; either the
1371 * fileserver responded with an error, or we are not supposed to
1372 * ask the fileserver for the information (e.g. we are salvaging
1373 * the entire partition or we are not the salvageserver)
1375 * @note for non-DAFS, always returns 1
1378 AskVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1381 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1382 if (programType == salvageServer) {
1383 if (singleVolumeNumber) {
1384 FSSYNC_VGQry_response_t q_res;
1386 struct VolumeSummary *vsp;
1388 struct VolumeDiskHeader diskHdr;
1390 memset(&res, 0, sizeof(res));
1392 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1395 * We must wait for the partition to finish scanning before
1396 * can continue, since we will not know if we got the entire
1397 * VG membership unless the partition is fully scanned.
1398 * We could, in theory, just scan the partition ourselves if
1399 * the VG cache is not ready, but we would be doing the exact
1400 * same scan the fileserver is doing; it will almost always
1401 * be faster to wait for the fileserver. The only exceptions
1402 * are if the partition does not take very long to scan, and
1403 * in that case it's fast either way, so who cares?
1405 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING) {
1406 Log("waiting for fileserver to finish scanning partition %s...\n",
1407 salvinfo->fileSysPartition->name);
1409 for (i = 1; code == SYNC_FAILED && res.hdr.reason == FSYNC_PART_SCANNING; i++) {
1410 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1411 * just so small partitions don't need to wait over 10
1412 * seconds every time, and large partitions are generally
1413 * polled only once every ten seconds. */
1414 sleep((i > 10) ? (i = 10) : i);
1416 code = FSYNC_VGCQuery(salvinfo->fileSysPartition->name, singleVolumeNumber, &q_res, &res);
1420 if (code == SYNC_FAILED && res.hdr.reason == FSYNC_UNKNOWN_VOLID) {
1421 /* This can happen if there's no header for the volume
1422 * we're salvaging, or no headers exist for the VG (if
1423 * we're salvaging an RW). Act as if we got a response
1424 * with no VG members. The headers may be created during
1425 * salvaging, if there are inodes in this VG. */
1427 memset(&q_res, 0, sizeof(q_res));
1428 q_res.rw = singleVolumeNumber;
1432 Log("fileserver refused VGCQuery request for volume %" AFS_VOLID_FMT " on "
1433 "partition %s, code %ld reason %ld\n",
1434 afs_printable_VolumeId_lu(singleVolumeNumber),
1435 salvinfo->fileSysPartition->name,
1436 afs_printable_int32_ld(code),
1437 afs_printable_int32_ld(res.hdr.reason));
1441 if (q_res.rw != singleVolumeNumber) {
1442 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1443 afs_printable_VolumeId_lu(singleVolumeNumber),
1444 afs_printable_VolumeId_lu(q_res.rw));
1445 #ifdef SALVSYNC_BUILD_CLIENT
1446 if (SALVSYNC_LinkVolume(q_res.rw,
1448 salvinfo->fileSysPartition->name,
1450 Log("schedule request failed\n");
1452 #endif /* SALVSYNC_BUILD_CLIENT */
1453 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1456 salvinfo->volumeSummaryp = calloc(VOL_VG_MAX_VOLS, sizeof(struct VolumeSummary));
1457 opr_Assert(salvinfo->volumeSummaryp != NULL);
1459 salvinfo->nVolumes = 0;
1460 vsp = salvinfo->volumeSummaryp;
1462 for (i = 0; i < VOL_VG_MAX_VOLS; i++) {
1463 char name[VMAXPATHLEN];
1465 if (!q_res.children[i]) {
1469 /* AskOffline for singleVolumeNumber was called much earlier */
1470 if (q_res.children[i] != singleVolumeNumber) {
1471 AskOffline(salvinfo, q_res.children[i]);
1472 if (LockVolume(salvinfo, q_res.children[i])) {
1478 code = VReadVolumeDiskHeader(q_res.children[i], salvinfo->fileSysPartition, &diskHdr);
1480 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1481 afs_printable_uint32_lu(q_res.children[i]));
1486 DiskToVolumeHeader(&vsp->header, &diskHdr);
1487 VolumeExternalName_r(q_res.children[i], name, sizeof(name));
1489 salvinfo->nVolumes++;
1493 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1498 Log("Cannot get volume summary from fileserver; falling back to scanning "
1499 "entire partition\n");
1502 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1507 * count how many volume headers are found by VWalkVolumeHeaders.
1509 * @param[in] dp the disk partition (unused)
1510 * @param[in] name full path to the .vol header (unused)
1511 * @param[in] hdr the header data (unused)
1512 * @param[in] last whether this is the last try or not (unused)
1513 * @param[in] rock actually an afs_int32*; the running count of how many
1514 * volumes we have found
1519 CountHeader(struct DiskPartition64 *dp, const char *name,
1520 struct VolumeDiskHeader *hdr, int last, void *rock)
1522 afs_int32 *nvols = (afs_int32 *)rock;
1528 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1531 struct SalvageScanParams {
1532 VolumeId singleVolumeNumber; /**< 0 for a partition-salvage, otherwise the
1533 * vol id of the VG we're salvaging */
1534 struct VolumeSummary *vsp; /**< ptr to the current volume summary object
1535 * we're filling in */
1536 afs_int32 nVolumes; /**< # of vols we've encountered */
1537 afs_int32 totalVolumes; /**< max # of vols we should encounter (the
1538 * # of vols we've alloc'd memory for) */
1539 int retry; /**< do we need to retry vol lock/checkout? */
1540 struct SalvInfo *salvinfo; /**< salvage job info */
1544 * records volume summary info found from VWalkVolumeHeaders.
1546 * Found volumes are also taken offline if they are in the specific volume
1547 * group we are looking for.
1549 * @param[in] dp the disk partition
1550 * @param[in] name full path to the .vol header
1551 * @param[in] hdr the header data
1552 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1553 * @param[in] rock actually a struct SalvageScanParams*, containing the
1554 * information needed to record the volume summary data
1556 * @return operation status
1558 * @retval -1 volume locking raced with fileserver restart; checking out
1559 * and locking volumes needs to be retried
1560 * @retval 1 volume header is mis-named and should be deleted
1563 RecordHeader(struct DiskPartition64 *dp, const char *name,
1564 struct VolumeDiskHeader *hdr, int last, void *rock)
1566 char nameShouldBe[64];
1567 struct SalvageScanParams *params;
1568 struct VolumeSummary summary;
1569 VolumeId singleVolumeNumber;
1570 struct SalvInfo *salvinfo;
1572 params = (struct SalvageScanParams *)rock;
1574 memset(&summary, 0, sizeof(summary));
1576 singleVolumeNumber = params->singleVolumeNumber;
1577 salvinfo = params->salvinfo;
1579 DiskToVolumeHeader(&summary.header, hdr);
1581 if (singleVolumeNumber && summary.header.id == singleVolumeNumber
1582 && summary.header.parent != singleVolumeNumber) {
1584 if (programType == salvageServer) {
1585 #ifdef SALVSYNC_BUILD_CLIENT
1586 Log("fileserver requested salvage of clone %" AFS_VOLID_FMT "; scheduling salvage of volume group %" AFS_VOLID_FMT "...\n",
1587 afs_printable_VolumeId_lu(summary.header.id),
1588 afs_printable_VolumeId_lu(summary.header.parent));
1589 if (SALVSYNC_LinkVolume(summary.header.parent,
1593 Log("schedule request failed\n");
1596 Exit(SALSRV_EXIT_VOLGROUP_LINK);
1599 Log("%" AFS_VOLID_FMT " is a read-only volume; not salvaged\n",
1600 afs_printable_VolumeId_lu(singleVolumeNumber));
1605 if (!singleVolumeNumber || summary.header.id == singleVolumeNumber
1606 || summary.header.parent == singleVolumeNumber) {
1608 /* check if the header file is incorrectly named */
1610 const char *base = strrchr(name, OS_DIRSEPC);
1617 snprintf(nameShouldBe, sizeof nameShouldBe,
1618 VFORMAT, afs_printable_VolumeId_lu(summary.header.id));
1621 if (strcmp(nameShouldBe, base)) {
1622 /* .vol file has wrong name; retry/delete */
1626 if (!badname || last) {
1627 /* only offline the volume if the header is good, or if this is
1628 * the last try looking at it; avoid AskOffline'ing the same vol
1631 if (singleVolumeNumber
1632 && summary.header.id != singleVolumeNumber) {
1633 /* don't offline singleVolumeNumber; we already did that
1636 AskOffline(salvinfo, summary.header.id);
1638 #ifdef AFS_DEMAND_ATTACH_FS
1640 /* don't lock the volume if the header is bad, since we're
1641 * about to delete it anyway. */
1642 if (LockVolume(salvinfo, summary.header.id)) {
1647 #endif /* AFS_DEMAND_ATTACH_FS */
1651 if (last && !Showmode) {
1652 Log("Volume header file %s is incorrectly named (should be %s "
1653 "not %s); %sdeleted (it will be recreated later, if "
1654 "necessary)\n", name, nameShouldBe, base,
1655 (Testing ? "it would have been " : ""));
1663 if (params->nVolumes > params->totalVolumes) {
1664 /* We found more volumes than we found on the first partition walk;
1665 * apparently something created a volume while we were
1666 * partition-salvaging, or we found more than 20 vols when salvaging a
1667 * particular volume. Abort if we detect this, since other programs
1668 * supposed to not touch the partition while it is partition-salvaging,
1669 * and we shouldn't find more than 20 vols in a VG.
1671 Abort("Found %ld vol headers, but should have found at most %ld! "
1672 "Make sure the volserver/fileserver are not running at the "
1673 "same time as a partition salvage\n",
1674 afs_printable_int32_ld(params->nVolumes),
1675 afs_printable_int32_ld(params->totalVolumes));
1678 memcpy(params->vsp, &summary, sizeof(summary));
1686 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1688 * If the header could not be read in at all, the header is always unlinked.
1689 * If instead RecordHeader said the header was bad (that is, the header file
1690 * is mis-named), we only unlink if we are doing a partition salvage, as
1691 * opposed to salvaging a specific volume group.
1693 * @param[in] dp the disk partition
1694 * @param[in] name full path to the .vol header
1695 * @param[in] hdr header data, or NULL if the header could not be read
1696 * @param[in] rock actually a struct SalvageScanParams*, with some information
1700 UnlinkHeader(struct DiskPartition64 *dp, const char *name,
1701 struct VolumeDiskHeader *hdr, void *rock)
1703 struct SalvageScanParams *params;
1706 params = (struct SalvageScanParams *)rock;
1709 /* no header; header is too bogus to read in at all */
1711 Log("%s is not a legitimate volume header file; %sdeleted\n", name, (Testing ? "it would have been " : ""));
1717 } else if (!params->singleVolumeNumber) {
1718 /* We were able to read in a header, but RecordHeader said something
1719 * was wrong with it. We only unlink those if we are doing a partition
1726 if (dounlink && unlink(name)) {
1727 Log("Error %d while trying to unlink %s\n", errno, name);
1732 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1733 * the fileserver for VG information, or by scanning the /vicepX partition.
1735 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1736 * are salvaging, or 0 if this is a partition
1739 * @return operation status
1741 * @retval -1 we raced with a fileserver restart; checking out and locking
1742 * volumes must be retried
1745 GetVolumeSummary(struct SalvInfo *salvinfo, VolumeId singleVolumeNumber)
1747 afs_int32 nvols = 0;
1748 struct SalvageScanParams params;
1751 code = AskVolumeSummary(salvinfo, singleVolumeNumber);
1753 /* we successfully got the vol information from the fileserver; no
1754 * need to scan the partition */
1758 /* we need to retry volume checkout */
1762 if (!singleVolumeNumber) {
1763 /* Count how many volumes we have in /vicepX */
1764 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, CountHeader,
1767 Abort("Can't read directory %s; not salvaged\n", salvinfo->fileSysPath);
1772 nvols = VOL_VG_MAX_VOLS;
1775 salvinfo->volumeSummaryp = calloc(nvols, sizeof(struct VolumeSummary));
1776 opr_Assert(salvinfo->volumeSummaryp != NULL);
1778 params.singleVolumeNumber = singleVolumeNumber;
1779 params.vsp = salvinfo->volumeSummaryp;
1780 params.nVolumes = 0;
1781 params.totalVolumes = nvols;
1783 params.salvinfo = salvinfo;
1785 /* walk the partition directory of volume headers and record the info
1786 * about them; unlinking invalid headers */
1787 code = VWalkVolumeHeaders(salvinfo->fileSysPartition, salvinfo->fileSysPath, RecordHeader,
1788 UnlinkHeader, ¶ms);
1790 /* we apparently need to retry checking-out/locking volumes */
1794 Abort("Failed to get volume header summary\n");
1796 salvinfo->nVolumes = params.nVolumes;
1798 qsort(salvinfo->volumeSummaryp, salvinfo->nVolumes, sizeof(struct VolumeSummary),
1804 /* Find the link table. This should be associated with the RW volume or, if
1805 * a RO only site, then the RO volume. For now, be cautious and hunt carefully.
1808 FindLinkHandle(struct InodeSummary *isp, int nVols,
1809 struct ViceInodeInfo *allInodes)
1812 struct ViceInodeInfo *ip;
1814 for (i = 0; i < nVols; i++) {
1815 ip = allInodes + isp[i].index;
1816 for (j = 0; j < isp[i].nSpecialInodes; j++) {
1817 if (ip[j].u.special.type == VI_LINKTABLE)
1818 return ip[j].inodeNumber;
1825 CreateLinkTable(struct SalvInfo *salvinfo, struct InodeSummary *isp, Inode ino)
1827 struct versionStamp version;
1830 if (!VALID_INO(ino))
1832 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->RWvolumeId,
1833 INODESPECIAL, VI_LINKTABLE, isp->RWvolumeId);
1834 if (!VALID_INO(ino))
1836 ("Unable to allocate link table inode for volume %" AFS_VOLID_FMT " (error = %d)\n",
1837 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1838 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1839 fdP = IH_OPEN(salvinfo->VGLinkH);
1841 Abort("Can't open link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1842 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1844 if (FDH_TRUNC(fdP, sizeof(version) + sizeof(short)) < 0)
1845 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1846 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1848 version.magic = LINKTABLEMAGIC;
1849 version.version = LINKTABLEVERSION;
1851 if (FDH_PWRITE(fdP, (char *)&version, sizeof(version), 0)
1853 Abort("Can't truncate link table for volume %" AFS_VOLID_FMT " (error = %d)\n",
1854 afs_printable_VolumeId_lu(isp->RWvolumeId), errno);
1856 FDH_REALLYCLOSE(fdP);
1858 /* If the volume summary exits (i.e., the V*.vol header file exists),
1859 * then set this inode there as well.
1861 if (isp->volSummary)
1862 isp->volSummary->header.linkTable = ino;
1871 SVGParms_t *parms = (SVGParms_t *) arg;
1872 DoSalvageVolumeGroup(parms->svgp_salvinfo, parms->svgp_inodeSummaryp, parms->svgp_count);
1877 nt_SalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1880 pthread_attr_t tattr;
1884 /* Initialize per volume global variables, even if later code does so */
1885 salvinfo->VolumeChanged = 0;
1886 salvinfo->VGLinkH = NULL;
1887 salvinfo->VGLinkH_cnt = 0;
1888 memset(&salvinfo->VolInfo, 0, sizeof(salvinfo->VolInfo));
1890 parms.svgp_inodeSummaryp = isp;
1891 parms.svgp_count = nVols;
1892 parms.svgp_salvinfo = salvinfo;
1893 code = pthread_attr_init(&tattr);
1895 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1899 code = pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_JOINABLE);
1901 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp->RWvolumeId);
1904 code = pthread_create(&tid, &tattr, nt_SVG, &parms);
1906 Log("Failed to create thread to salvage volume group %u\n",
1910 (void)pthread_join(tid, NULL);
1912 #endif /* AFS_NT40_ENV */
1915 DoSalvageVolumeGroup(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
1917 struct ViceInodeInfo *inodes, *allInodes, *ip;
1918 int i, totalInodes, size, salvageTo;
1922 int dec_VGLinkH = 0;
1924 FdHandle_t *fdP = NULL;
1926 salvinfo->VGLinkH_cnt = 0;
1927 haveRWvolume = (isp->volumeId == isp->RWvolumeId
1928 && isp->nSpecialInodes > 0);
1929 if ((!ShowMounts) || (ShowMounts && !haveRWvolume)) {
1930 if (!ForceSalvage && QuickCheck(salvinfo, isp, nVols))
1933 if (ShowMounts && !haveRWvolume)
1935 if (canfork && !debug && Fork() != 0) {
1936 (void)Wait("Salvage volume group");
1939 for (i = 0, totalInodes = 0; i < nVols; i++)
1940 totalInodes += isp[i].nInodes;
1941 size = totalInodes * sizeof(struct ViceInodeInfo);
1942 inodes = malloc(size);
1943 allInodes = inodes - isp->index; /* this would the base of all the inodes
1944 * for the partition, if all the inodes
1945 * had been read into memory */
1947 (salvinfo->inodeFd, isp->index * sizeof(struct ViceInodeInfo),
1949 opr_Verify(OS_READ(salvinfo->inodeFd, inodes, size) == size);
1951 /* Don't try to salvage a read write volume if there isn't one on this
1953 salvageTo = haveRWvolume ? 0 : 1;
1955 #ifdef AFS_NAMEI_ENV
1956 ino = FindLinkHandle(isp, nVols, allInodes);
1957 if (VALID_INO(ino)) {
1958 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, isp->RWvolumeId, ino);
1959 fdP = IH_OPEN(salvinfo->VGLinkH);
1961 if (VALID_INO(ino) && fdP != NULL) {
1962 struct versionStamp header;
1963 afs_sfsize_t nBytes;
1965 nBytes = FDH_PREAD(fdP, (char *)&header, sizeof(struct versionStamp), 0);
1966 if (nBytes != sizeof(struct versionStamp)
1967 || header.magic != LINKTABLEMAGIC) {
1968 Log("Bad linktable header for volume %" AFS_VOLID_FMT ".\n", afs_printable_VolumeId_lu(isp->RWvolumeId));
1969 FDH_REALLYCLOSE(fdP);
1973 if (!VALID_INO(ino) || fdP == NULL) {
1974 Log("%s link table for volume %" AFS_VOLID_FMT ".\n",
1975 Testing ? "Would have recreated" : "Recreating", afs_printable_VolumeId_lu(isp->RWvolumeId));
1977 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
1980 struct ViceInodeInfo *ip;
1981 CreateLinkTable(salvinfo, isp, ino);
1982 fdP = IH_OPEN(salvinfo->VGLinkH);
1983 /* Sync fake 1 link counts to the link table, now that it exists */
1985 for (i = 0; i < nVols; i++) {
1986 ip = allInodes + isp[i].index;
1987 for (j = isp[i].nSpecialInodes; j < isp[i].nInodes; j++) {
1988 namei_SetLinkCount(fdP, ip[j].inodeNumber, 1, 1);
1989 ip[j].linkCount = 1;
1996 FDH_REALLYCLOSE(fdP);
1998 IH_INIT(salvinfo->VGLinkH, salvinfo->fileSysDevice, -1, -1);
2001 /* Salvage in reverse order--read/write volume last; this way any
2002 * Inodes not referenced by the time we salvage the read/write volume
2003 * can be picked up by the read/write volume */
2004 /* ACTUALLY, that's not done right now--the inodes just vanish */
2005 for (i = nVols - 1; i >= salvageTo; i--) {
2007 struct InodeSummary *lisp = &isp[i];
2008 #ifdef AFS_NAMEI_ENV
2009 if (rw && (nVols > 1 || isp[i].nSpecialInodes == isp[i].nInodes)) {
2010 /* If nVols > 1, we have more than one vol in this volgroup, so
2011 * the RW inodes we detected may just be for the linktable, and
2012 * there is no actual RW volume.
2014 * Additionally, if we only have linktable inodes (no other
2015 * special inodes, no data inodes), there is also no actual RW
2016 * volume to salvage; this is just cruft left behind by something
2017 * else. In that case nVols will only be 1, though, so also
2018 * perform this linktables-only check if we don't have any
2019 * non-special inodes. */
2021 int all_linktables = 1;
2022 for (inode_i = 0; inode_i < isp[i].nSpecialInodes; inode_i++) {
2023 if (inodes[inode_i].u.special.type != VI_LINKTABLE) {
2028 if (all_linktables) {
2029 /* All we have are linktable special inodes, so skip salvaging
2030 * the RW; there was never an RW volume here. If we don't do
2031 * this, we risk creating a new "phantom" RW that the VLDB
2032 * doesn't know about, which is confusing and can cause
2040 Log("%s VOLUME %" AFS_VOLID_FMT "%s.\n", rw ? "SALVAGING" : "CHECKING CLONED",
2041 afs_printable_VolumeId_lu(lisp->volumeId), (Testing ? "(READONLY mode)" : ""));
2042 /* Check inodes twice. The second time do things seriously. This
2043 * way the whole RO volume can be deleted, below, if anything goes wrong */
2044 for (check = 1; check >= 0; check--) {
2046 if (SalvageVolumeHeaderFile(salvinfo, lisp, allInodes, rw, check, &deleteMe)
2048 MaybeZapVolume(salvinfo, lisp, "Volume header", deleteMe, check);
2049 if (rw && deleteMe) {
2050 haveRWvolume = 0; /* This will cause its inodes to be deleted--since salvage
2051 * volume won't be called */
2057 if (rw && check == 1)
2059 if (SalvageVnodes(salvinfo, isp, lisp, allInodes, check) == -1) {
2060 MaybeZapVolume(salvinfo, lisp, "Vnode index", 0, check);
2066 /* Fix actual inode counts */
2069 Log("totalInodes %d\n",totalInodes);
2070 for (ip = inodes; totalInodes; ip++, totalInodes--) {
2071 static int TraceBadLinkCounts = 0;
2072 #ifdef AFS_NAMEI_ENV
2073 if (salvinfo->VGLinkH->ih_ino == ip->inodeNumber) {
2074 dec_VGLinkH = ip->linkCount - salvinfo->VGLinkH_cnt;
2075 VGLinkH_p1 = ip->u.param[0];
2076 continue; /* Deal with this last. */
2079 if (ip->linkCount != 0 && TraceBadLinkCounts) {
2080 TraceBadLinkCounts--; /* Limit reports, per volume */
2081 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip->linkCount, PrintInode(stmp, ip->inodeNumber), (afs_uintmax_t) ip->byteCount, ip->u.param[0], ip->u.param[1], ip->u.param[2], ip->u.param[3]); /* VolumeId in param */
2084 /* If ip->linkCount is non-zero at this point, then the linkcount
2085 * for the inode on disk is wrong. Initially linkCount is set to
2086 * the actual link count of the inode on disk, and then we (the
2087 * salvager) decrement it for every reference to that inode that we
2088 * find. So if linkCount is still positive by this point, it means
2089 * that the linkcount on disk is too high, so we should DEC the
2090 * inode. If linkCount is negative, it means the linkcount is too
2091 * low, so we should INC the inode.
2093 * If we get an error while INC'ing or DEC'ing, that's a little
2094 * odd and indicates a bug, but try to continue anyway, so the
2095 * volume may still be made accessible. */
2096 while (ip->linkCount > 0) {
2098 if (IH_DEC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2099 Log("idec failed. inode %s errno %d\n",
2100 PrintInode(stmp, ip->inodeNumber), errno);
2106 while (ip->linkCount < 0) {
2108 if (IH_INC(salvinfo->VGLinkH, ip->inodeNumber, ip->u.param[0])) {
2109 Log("iinc failed. inode %s errno %d\n",
2110 PrintInode(stmp, ip->inodeNumber), errno);
2117 #ifdef AFS_NAMEI_ENV
2118 while (dec_VGLinkH > 0) {
2119 if (IH_DEC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2120 Log("idec failed on link table, errno = %d\n", errno);
2124 while (dec_VGLinkH < 0) {
2125 if (IH_INC(salvinfo->VGLinkH, salvinfo->VGLinkH->ih_ino, VGLinkH_p1) < 0) {
2126 Log("iinc failed on link table, errno = %d\n", errno);
2133 /* Directory consistency checks on the rw volume */
2135 SalvageVolume(salvinfo, isp, salvinfo->VGLinkH);
2136 IH_RELEASE(salvinfo->VGLinkH);
2138 if (canfork && !debug) {
2145 QuickCheck(struct SalvInfo *salvinfo, struct InodeSummary *isp, int nVols)
2147 /* Check headers BEFORE forking */
2151 for (i = 0; i < nVols; i++) {
2152 struct VolumeSummary *vs = isp[i].volSummary;
2153 VolumeDiskData volHeader;
2155 /* Don't salvage just because phantom rw volume is there... */
2156 /* (If a read-only volume exists, read/write inodes must also exist) */
2157 if (i == 0 && isp->nSpecialInodes == 0 && nVols > 1)
2161 IH_INIT(h, salvinfo->fileSysDevice, vs->header.parent, vs->header.volumeInfo);
2162 if (IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader))
2163 == sizeof(volHeader)
2164 && volHeader.stamp.magic == VOLUMEINFOMAGIC
2165 && volHeader.dontSalvage == DONT_SALVAGE
2166 && volHeader.needsSalvaged == 0 && volHeader.destroyMe == 0) {
2167 if (volHeader.inUse != 0) {
2168 volHeader.inUse = 0;
2169 volHeader.inService = 1;
2171 if (IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader))
2172 != sizeof(volHeader)) {
2188 /* SalvageVolumeHeaderFile
2190 * Salvage the top level V*.vol header file. Make sure the special files
2191 * exist and that there are no duplicates.
2193 * Calls SalvageHeader for each possible type of volume special file.
2197 SalvageVolumeHeaderFile(struct SalvInfo *salvinfo, struct InodeSummary *isp,
2198 struct ViceInodeInfo *inodes, int RW,
2199 int check, int *deleteMe)
2202 struct ViceInodeInfo *ip;
2203 int allinodesobsolete = 1;
2204 struct VolumeDiskHeader diskHeader;
2205 afs_int32 (*writefunc)(VolumeDiskHeader_t *, struct DiskPartition64 *) = NULL;
2207 struct VolumeHeader tempHeader;
2208 struct afs_inode_info stuff[MAXINODETYPE];
2210 /* keeps track of special inodes that are probably 'good'; they are
2211 * referenced in the vol header, and are included in the given inodes
2216 } goodspecial[MAXINODETYPE];
2221 memset(goodspecial, 0, sizeof(goodspecial));
2223 skip = calloc(isp->nSpecialInodes, sizeof(*skip));
2225 Log("cannot allocate memory for inode skip array when salvaging "
2226 "volume %lu; not performing duplicate special inode recovery\n",
2227 afs_printable_uint32_lu(isp->volumeId));
2228 /* still try to perform the salvage; the skip array only does anything
2229 * if we detect duplicate special inodes */
2232 init_inode_info(&tempHeader, stuff);
2235 * First, look at the special inodes and see if any are referenced by
2236 * the existing volume header. If we find duplicate special inodes, we
2237 * can use this information to use the referenced inode (it's more
2238 * likely to be the 'good' one), and throw away the duplicates.
2240 if (isp->volSummary && skip) {
2241 /* use tempHeader, so we can use the stuff[] array to easily index
2242 * into the isp->volSummary special inodes */
2243 memcpy(&tempHeader, &isp->volSummary->header, sizeof(struct VolumeHeader));
2245 for (i = 0; i < isp->nSpecialInodes; i++) {
2246 ip = &inodes[isp->index + i];
2247 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2248 /* will get taken care of in a later loop */
2251 if (ip->inodeNumber == *(stuff[ip->u.special.type - 1].inode)) {
2252 goodspecial[ip->u.special.type-1].valid = 1;
2253 goodspecial[ip->u.special.type-1].inode = ip->inodeNumber;
2258 memset(&tempHeader, 0, sizeof(tempHeader));
2259 tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
2260 tempHeader.stamp.version = VOLUMEHEADERVERSION;
2261 tempHeader.id = isp->volumeId;
2262 tempHeader.parent = isp->RWvolumeId;
2264 /* Check for duplicates (inodes are sorted by type field) */
2265 for (i = 0; i < isp->nSpecialInodes - 1; i++) {
2266 ip = &inodes[isp->index + i];
2267 if (ip->u.special.type == (ip + 1)->u.special.type) {
2268 afs_ino_str_t stmp1, stmp2;
2270 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2271 /* Will be caught in the loop below */
2275 Log("Duplicate special %d inodes for volume %" AFS_VOLID_FMT " found (%s, %s);\n",
2276 ip->u.special.type, afs_printable_VolumeId_lu(isp->volumeId),
2277 PrintInode(stmp1, ip->inodeNumber),
2278 PrintInode(stmp2, (ip+1)->inodeNumber));
2280 if (skip && goodspecial[ip->u.special.type-1].valid) {
2281 Inode gi = goodspecial[ip->u.special.type-1].inode;
2284 Log("using special inode referenced by vol header (%s)\n",
2285 PrintInode(stmp1, gi));
2288 /* the volume header references some special inode of
2289 * this type in the inodes array; are we it? */
2290 if (ip->inodeNumber != gi) {
2292 } else if ((ip+1)->inodeNumber != gi) {
2293 /* in case this is the last iteration; we need to
2294 * make sure we check ip+1, too */
2299 Log("cannot determine which is correct; salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
2307 for (i = 0; i < isp->nSpecialInodes; i++) {
2309 ip = &inodes[isp->index + i];
2310 if (ip->u.special.type <= 0 || ip->u.special.type > MAXINODETYPE) {
2312 Log("Rubbish header inode %s of type %d\n",
2313 PrintInode(stmp, ip->inodeNumber),
2314 ip->u.special.type);
2320 Log("Rubbish header inode %s of type %d; deleted\n",
2321 PrintInode(stmp, ip->inodeNumber),
2322 ip->u.special.type);
2323 } else if (!stuff[ip->u.special.type - 1].obsolete) {
2324 if (skip && skip[i]) {
2325 if (orphans == ORPH_REMOVE) {
2326 Log("Removing orphan special inode %s of type %d\n",
2327 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2330 Log("Ignoring orphan special inode %s of type %d\n",
2331 PrintInode(stmp, ip->inodeNumber), ip->u.special.type);
2332 /* fall through to the ip->linkCount--; line below */
2335 *(stuff[ip->u.special.type - 1].inode) = ip->inodeNumber;
2336 allinodesobsolete = 0;
2338 if (!check && ip->u.special.type != VI_LINKTABLE)
2339 ip->linkCount--; /* Keep the inode around */
2347 if (allinodesobsolete) {
2354 salvinfo->VGLinkH_cnt++; /* one for every header. */
2356 if (!RW && !check && isp->volSummary) {
2357 ClearROInUseBit(isp->volSummary);
2361 for (i = 0; i < MAXINODETYPE; i++) {
2362 if (stuff[i].inodeType == VI_LINKTABLE) {
2363 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2364 * And we may have recreated the link table earlier, so set the
2365 * RW header as well. The header magic was already checked.
2367 if (VALID_INO(salvinfo->VGLinkH->ih_ino)) {
2368 *stuff[i].inode = salvinfo->VGLinkH->ih_ino;
2372 if (SalvageHeader(salvinfo, &stuff[i], isp, check, deleteMe) == -1 && check)
2376 if (isp->volSummary == NULL) {
2378 char headerName[64];
2379 snprintf(headerName, sizeof headerName, VFORMAT,
2380 afs_printable_VolumeId_lu(isp->volumeId));
2381 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2382 salvinfo->fileSysPath, headerName);
2384 Log("No header file for volume %" AFS_VOLID_FMT "\n", afs_printable_VolumeId_lu(isp->volumeId));
2388 Log("No header file for volume %" AFS_VOLID_FMT "; %screating %s\n",
2389 afs_printable_VolumeId_lu(isp->volumeId), (Testing ? "it would have been " : ""),
2391 isp->volSummary = calloc(1, sizeof(struct VolumeSummary));
2393 writefunc = VCreateVolumeDiskHeader;
2396 char headerName[64];
2397 /* hack: these two fields are obsolete... */
2398 isp->volSummary->header.volumeAcl = 0;
2399 isp->volSummary->header.volumeMountTable = 0;
2402 (&isp->volSummary->header, &tempHeader,
2403 sizeof(struct VolumeHeader))) {
2404 VolumeExternalName_r(isp->volumeId, headerName, sizeof(headerName));
2405 snprintf(path, sizeof path, "%s" OS_DIRSEP "%s",
2406 salvinfo->fileSysPath, headerName);
2408 Log("Header file %s is damaged or no longer valid%s\n", path,
2409 (check ? "" : "; repairing"));
2413 writefunc = VWriteVolumeDiskHeader;
2417 memcpy(&isp->volSummary->header, &tempHeader,
2418 sizeof(struct VolumeHeader));
2421 Log("It would have written a new header file for volume %" AFS_VOLID_FMT "\n",
2422 afs_printable_VolumeId_lu(isp->volumeId));
2425 VolumeHeaderToDisk(&diskHeader, &tempHeader);
2426 code = (*writefunc)(&diskHeader, salvinfo->fileSysPartition);
2428 Log("Error %ld writing volume header file for volume %" AFS_VOLID_FMT "\n",
2429 afs_printable_int32_ld(code),
2430 afs_printable_VolumeId_lu(diskHeader.id));
2435 IH_INIT(isp->volSummary->volumeInfoHandle, salvinfo->fileSysDevice, isp->RWvolumeId,
2436 isp->volSummary->header.volumeInfo);
2441 SalvageHeader(struct SalvInfo *salvinfo, struct afs_inode_info *sp,
2442 struct InodeSummary *isp, int check, int *deleteMe)
2445 VolumeDiskData volumeInfo;
2446 struct versionStamp fileHeader;
2455 #ifndef AFS_NAMEI_ENV
2456 if (sp->inodeType == VI_LINKTABLE)
2457 return 0; /* header magic was already checked */
2459 if (*(sp->inode) == 0) {
2461 Log("Missing inode in volume header (%s)\n", sp->description);
2465 Log("Missing inode in volume header (%s); %s\n", sp->description,
2466 (Testing ? "it would have recreated it" : "recreating"));
2469 IH_CREATE(NULL, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, isp->volumeId,
2470 INODESPECIAL, sp->inodeType, isp->RWvolumeId);
2471 if (!VALID_INO(*(sp->inode)))
2473 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2474 sp->description, errno);
2479 IH_INIT(specH, salvinfo->fileSysDevice, isp->RWvolumeId, *(sp->inode));
2480 fdP = IH_OPEN(specH);
2481 if (OKToZap && (fdP == NULL) && BadError(errno)) {
2482 /* bail out early and destroy the volume */
2484 Log("Still can't open volume header inode (%s), destroying volume\n", sp->description);
2491 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2492 sp->description, errno);
2495 && (FDH_PREAD(fdP, (char *)&header, sp->size, 0) != sp->size
2496 || header.fileHeader.magic != sp->stamp.magic)) {
2498 Log("Part of the header (%s) is corrupted\n", sp->description);
2499 FDH_REALLYCLOSE(fdP);
2503 Log("Part of the header (%s) is corrupted; recreating\n",
2506 /* header can be garbage; make sure we don't read garbage data from
2508 memset(&header, 0, sizeof(header));
2510 #ifdef AFS_NAMEI_ENV
2511 if (namei_FixSpecialOGM(fdP, check)) {
2512 Log("Error with namei header OGM data (%s)\n", sp->description);
2513 FDH_REALLYCLOSE(fdP);
2518 if (sp->inodeType == VI_VOLINFO
2519 && header.volumeInfo.destroyMe == DESTROY_ME) {
2522 FDH_REALLYCLOSE(fdP);
2526 if (recreate && !Testing) {
2529 ("Internal error: recreating volume header (%s) in check mode\n",
2531 nBytes = FDH_TRUNC(fdP, 0);
2533 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2534 sp->description, errno);
2536 /* The following code should be moved into vutil.c */
2537 if (sp->inodeType == VI_VOLINFO) {
2539 memset(&header.volumeInfo, 0, sizeof(header.volumeInfo));
2540 header.volumeInfo.stamp = sp->stamp;
2541 header.volumeInfo.id = isp->volumeId;
2542 header.volumeInfo.parentId = isp->RWvolumeId;
2543 sprintf(header.volumeInfo.name, "bogus.%" AFS_VOLID_FMT, afs_printable_VolumeId_lu(isp->volumeId));
2544 Log("Warning: the name of volume %" AFS_VOLID_FMT " is now \"bogus.%" AFS_VOLID_FMT "\"\n",
2545 afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->volumeId));
2546 header.volumeInfo.inService = 0;
2547 header.volumeInfo.blessed = 0;
2548 /* The + 1000 is a hack in case there are any files out in venus caches */
2549 header.volumeInfo.uniquifier = (isp->maxUniquifier + 1) + 1000;
2550 header.volumeInfo.type = (isp->volumeId == isp->RWvolumeId ? readwriteVolume : readonlyVolume); /* XXXX */
2551 header.volumeInfo.needsCallback = 0;
2552 gettimeofday(&tp, NULL);
2553 header.volumeInfo.creationDate = tp.tv_sec;
2555 FDH_PWRITE(fdP, (char *)&header.volumeInfo,
2556 sizeof(header.volumeInfo), 0);
2557 if (nBytes != sizeof(header.volumeInfo)) {
2560 ("Unable to write volume header file (%s) (errno = %d)\n",
2561 sp->description, errno);
2562 Abort("Unable to write entire volume header file (%s)\n",
2566 nBytes = FDH_PWRITE(fdP, (char *)&sp->stamp, sizeof(sp->stamp), 0);
2567 if (nBytes != sizeof(sp->stamp)) {
2570 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2571 sp->description, errno);
2573 ("Unable to write entire version stamp in volume header file (%s)\n",
2578 FDH_REALLYCLOSE(fdP);
2580 if (sp->inodeType == VI_VOLINFO) {
2581 salvinfo->VolInfo = header.volumeInfo;
2585 if (salvinfo->VolInfo.updateDate) {
2586 strcpy(update, TimeStamp(salvinfo->VolInfo.updateDate, 0));
2588 Log("%s (%" AFS_VOLID_FMT ") %supdated %s\n", salvinfo->VolInfo.name,
2589 afs_printable_VolumeId_lu(salvinfo->VolInfo.id),
2590 (Testing ? "it would have been " : ""), update);
2592 strcpy(update, TimeStamp(salvinfo->VolInfo.creationDate, 0));
2594 Log("%s (%" AFS_VOLID_FMT ") not updated (created %s)\n",
2595 salvinfo->VolInfo.name, afs_printable_VolumeId_lu(salvinfo->VolInfo.id), update);
2605 SalvageVnodes(struct SalvInfo *salvinfo,
2606 struct InodeSummary *rwIsp,
2607 struct InodeSummary *thisIsp,
2608 struct ViceInodeInfo *inodes, int check)
2610 int ilarge, ismall, ioffset, RW, nInodes;
2611 ioffset = rwIsp->index + rwIsp->nSpecialInodes; /* first inode */
2614 RW = (rwIsp == thisIsp);
2615 nInodes = (rwIsp->nInodes - rwIsp->nSpecialInodes);
2617 SalvageIndex(salvinfo, thisIsp->volSummary->header.smallVnodeIndex, vSmall, RW,
2618 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2619 if (check && ismall == -1)
2622 SalvageIndex(salvinfo, thisIsp->volSummary->header.largeVnodeIndex, vLarge, RW,
2623 &inodes[ioffset], nInodes, thisIsp->volSummary, check);
2624 return (ilarge == 0 && ismall == 0 ? 0 : -1);
2628 SalvageIndex(struct SalvInfo *salvinfo, Inode ino, VnodeClass class, int RW,
2629 struct ViceInodeInfo *ip, int nInodes,
2630 struct VolumeSummary *volSummary, int check)
2632 char buf[SIZEOF_LARGEDISKVNODE];
2633 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
2635 StreamHandle_t *file;
2636 struct VnodeClassInfo *vcp;
2638 afs_sfsize_t nVnodes;
2639 afs_fsize_t vnodeLength;
2641 afs_ino_str_t stmp1, stmp2;
2645 IH_INIT(handle, salvinfo->fileSysDevice, volSummary->header.parent, ino);
2646 fdP = IH_OPEN(handle);
2647 opr_Assert(fdP != NULL);
2648 file = FDH_FDOPEN(fdP, "r+");
2649 opr_Assert(file != NULL);
2650 vcp = &VnodeClassInfo[class];
2651 size = OS_SIZE(fdP->fd_fd);
2652 opr_Assert(size != -1);
2653 nVnodes = (size / vcp->diskSize) - 1;
2655 opr_Assert((nVnodes + 1) * vcp->diskSize == size);
2656 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
2660 for (vnodeIndex = 0;
2661 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
2662 nVnodes--, vnodeIndex++) {
2663 if (vnode->type != vNull) {
2664 int vnodeChanged = 0;
2665 int vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
2666 if (VNDISK_GET_INO(vnode) == 0) {
2668 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2669 memset(vnode, 0, vcp->diskSize);
2673 if (vcp->magic != vnode->vnodeMagic) {
2674 /* bad magic #, probably partially created vnode */
2676 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2677 vnodeNumber, afs_printable_uint32_lu(vnode->vnodeMagic),
2678 afs_printable_uint32_lu(vcp->magic));
2679 memset(vnode, 0, vcp->diskSize);
2683 Log("Partially allocated vnode %d deleted.\n",
2685 memset(vnode, 0, vcp->diskSize);
2689 /* ****** Should do a bit more salvage here: e.g. make sure
2690 * vnode type matches what it should be given the index */
2691 while (nInodes && ip->u.vnode.vnodeNumber < vnodeNumber) {
2692 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2693 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2694 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2701 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2702 /* The following doesn't work, because the version number
2703 * is not maintained correctly by the file server */
2704 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2705 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2707 if (VNDISK_GET_INO(vnode) == ip->inodeNumber)
2713 /* For RW volume, look for vnode with matching inode number;
2714 * if no such match, take the first determined by our sort
2716 struct ViceInodeInfo *lip = ip;
2717 int lnInodes = nInodes;
2719 && lip->u.vnode.vnodeNumber == vnodeNumber) {
2720 if (VNDISK_GET_INO(vnode) == lip->inodeNumber) {
2729 if (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2730 /* "Matching" inode */
2734 vu = vnode->uniquifier;
2735 iu = ip->u.vnode.vnodeUniquifier;
2736 vd = vnode->dataVersion;
2737 id = ip->u.vnode.inodeDataVersion;
2739 * Because of the possibility of the uniquifier overflows (> 4M)
2740 * we compare them modulo the low 22-bits; we shouldn't worry
2741 * about mismatching since they shouldn't to many old
2742 * uniquifiers of the same vnode...
2744 if (IUnique(vu) != IUnique(iu)) {
2746 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber, IUnique(vu), IUnique(iu));
2749 vnode->uniquifier = iu;
2750 #ifdef AFS_3DISPARES
2751 vnode->dataVersion = (id >= vd ?
2754 1887437 ? vd : id) :
2757 1887437 ? id : vd));
2759 #if defined(AFS_SGI_EXMAG)
2760 vnode->dataVersion = (id >= vd ?
2763 15099494 ? vd : id) :
2766 15099494 ? id : vd));
2768 vnode->dataVersion = (id > vd ? id : vd);
2769 #endif /* AFS_SGI_EXMAG */
2770 #endif /* AFS_3DISPARES */
2773 /* don't bother checking for vd > id any more, since
2774 * partial file transfers always result in this state,
2775 * and you can't do much else anyway (you've already
2776 * found the best data you can) */
2777 #ifdef AFS_3DISPARES
2778 if (!vnodeIsDirectory(vnodeNumber)
2779 && ((vd < id && (id - vd) < 1887437)
2780 || ((vd > id && (vd - id) > 1887437)))) {
2782 #if defined(AFS_SGI_EXMAG)
2783 if (!vnodeIsDirectory(vnodeNumber)
2784 && ((vd < id && (id - vd) < 15099494)
2785 || ((vd > id && (vd - id) > 15099494)))) {
2787 if (!vnodeIsDirectory(vnodeNumber) && vd < id) {
2788 #endif /* AFS_SGI_EXMAG */
2791 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber);
2792 vnode->dataVersion = id;
2797 if (ip->inodeNumber != VNDISK_GET_INO(vnode)) {
2800 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2802 VNDISK_SET_INO(vnode, ip->inodeNumber);
2807 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber, PrintInode(stmp1, VNDISK_GET_INO(vnode)), PrintInode(stmp2, ip->inodeNumber), (afs_uintmax_t) ip->byteCount);
2809 VNDISK_SET_INO(vnode, ip->inodeNumber);
2812 VNDISK_GET_LEN(vnodeLength, vnode);
2813 if (ip->byteCount != vnodeLength) {
2816 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2821 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber, (afs_uintmax_t) vnodeLength, (afs_uintmax_t) ip->byteCount);
2822 VNDISK_SET_LEN(vnode, ip->byteCount);
2826 ip->linkCount--; /* Keep the inode around */
2829 } else { /* no matching inode */
2831 if (VNDISK_GET_INO(vnode) != 0
2832 || vnode->type == vDirectory) {
2833 /* No matching inode--get rid of the vnode */
2835 if (VNDISK_GET_INO(vnode)) {
2837 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)));
2841 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber, vnode->uniquifier);
2846 if (VNDISK_GET_INO(vnode)) {
2848 time_t serverModifyTime = vnode->serverModifyTime;
2849 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, PrintInode(stmp, VNDISK_GET_INO(vnode)), ctime(&serverModifyTime));
2853 time_t serverModifyTime = vnode->serverModifyTime;
2854 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber, vnode->uniquifier, ctime(&serverModifyTime));
2857 memset(vnode, 0, vcp->diskSize);
2860 /* Should not reach here becuase we checked for
2861 * (inodeNumber == 0) above. And where we zero the vnode,
2862 * we also goto vnodeDone.
2866 while (nInodes && ip->u.vnode.vnodeNumber == vnodeNumber) {
2870 } /* VNDISK_GET_INO(vnode) != 0 */
2872 opr_Assert(!(vnodeChanged && check));
2873 if (vnodeChanged && !Testing) {
2874 opr_Verify(IH_IWRITE(handle,
2875 vnodeIndexOffset(vcp, vnodeNumber),
2876 (char *)vnode, vcp->diskSize)
2878 salvinfo->VolumeChanged = 1; /* For break call back */
2889 struct VnodeEssence *
2890 CheckVnodeNumber(struct SalvInfo *salvinfo, VnodeId vnodeNumber)
2893 struct VnodeInfo *vip;
2896 class = vnodeIdToClass(vnodeNumber);
2897 vip = &salvinfo->vnodeInfo[class];
2898 offset = vnodeIdToBitNumber(vnodeNumber);
2899 return (offset >= vip->nVnodes ? NULL : &vip->vnodes[offset]);
2903 CopyOnWrite(struct SalvInfo *salvinfo, struct DirSummary *dir)
2905 /* Copy the directory unconditionally if we are going to change it:
2906 * not just if was cloned.
2908 struct VnodeDiskObject vnode;
2909 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2910 Inode oldinode, newinode;
2913 if (dir->copied || Testing)
2915 DFlush(); /* Well justified paranoia... */
2918 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2919 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2921 opr_Assert(code == sizeof(vnode));
2922 oldinode = VNDISK_GET_INO(&vnode);
2923 /* Increment the version number by a whole lot to avoid problems with
2924 * clients that were promised new version numbers--but the file server
2925 * crashed before the versions were written to disk.
2928 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2929 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2931 opr_Assert(VALID_INO(newinode));
2932 opr_Verify(CopyInode(salvinfo->fileSysDevice, oldinode, newinode,
2935 VNDISK_SET_INO(&vnode, newinode);
2937 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
2938 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2940 opr_Assert(code == sizeof(vnode));
2942 SetSalvageDirHandle(&dir->dirHandle, dir->dirHandle.dirh_handle->ih_vid,
2943 salvinfo->fileSysDevice, newinode,
2944 &salvinfo->VolumeChanged);
2945 /* Don't delete the original inode right away, because the directory is
2946 * still being scanned.
2952 * This function should either successfully create a new dir, or give up
2953 * and leave things the way they were. In particular, if it fails to write
2954 * the new dir properly, it should return w/o changing the reference to the
2958 CopyAndSalvage(struct SalvInfo *salvinfo, struct DirSummary *dir)
2960 struct VnodeDiskObject vnode;
2961 struct VnodeClassInfo *vcp = &VnodeClassInfo[vLarge];
2962 Inode oldinode, newinode;
2967 afs_int32 parentUnique = 1;
2968 struct VnodeEssence *vnodeEssence;
2973 Log("Salvaging directory %u...\n", dir->vnodeNumber);
2975 IH_IREAD(salvinfo->vnodeInfo[vLarge].handle,
2976 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
2978 opr_Assert(lcode == sizeof(vnode));
2979 oldinode = VNDISK_GET_INO(&vnode);
2980 /* Increment the version number by a whole lot to avoid problems with
2981 * clients that were promised new version numbers--but the file server
2982 * crashed before the versions were written to disk.
2985 IH_CREATE(dir->ds_linkH, salvinfo->fileSysDevice, salvinfo->fileSysPath, 0, dir->rwVid,
2986 dir->vnodeNumber, vnode.uniquifier, vnode.dataVersion +=
2988 opr_Assert(VALID_INO(newinode));
2989 SetSalvageDirHandle(&newdir, dir->rwVid, salvinfo->fileSysDevice, newinode,
2990 &salvinfo->VolumeChanged);
2992 /* Assign . and .. vnode numbers from dir and vnode.parent.
2993 * The uniquifier for . is in the vnode.
2994 * The uniquifier for .. might be set to a bogus value of 1 and
2995 * the salvager will later clean it up.
2997 if (vnode.parent && (vnodeEssence = CheckVnodeNumber(salvinfo, vnode.parent))) {
2998 parentUnique = (vnodeEssence->unique ? vnodeEssence->unique : 1);
3001 DirSalvage(&dir->dirHandle, &newdir, dir->vnodeNumber,
3003 (vnode.parent ? vnode.parent : dir->vnodeNumber),
3008 /* didn't really build the new directory properly, let's just give up. */
3009 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3010 Log("Directory salvage returned code %d, continuing.\n", code);
3012 Log("also failed to decrement link count on new inode");
3016 Log("Checking the results of the directory salvage...\n");
3017 if (!DirOK(&newdir)) {
3018 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3019 code = IH_DEC(dir->ds_linkH, newinode, dir->rwVid);
3020 opr_Assert(code == 0);
3024 VNDISK_SET_INO(&vnode, newinode);
3025 length = afs_dir_Length(&newdir);
3026 VNDISK_SET_LEN(&vnode, length);
3028 IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3029 vnodeIndexOffset(vcp, dir->vnodeNumber), (char *)&vnode,
3031 opr_Assert(lcode == sizeof(vnode));
3032 IH_CONDSYNC(salvinfo->vnodeInfo[vLarge].handle);
3034 /* make sure old directory file is really closed */
3035 fdP = IH_OPEN(dir->dirHandle.dirh_handle);
3036 FDH_REALLYCLOSE(fdP);
3038 code = IH_DEC(dir->ds_linkH, oldinode, dir->rwVid);
3039 opr_Assert(code == 0);
3040 dir->dirHandle = newdir;
3044 * arguments for JudgeEntry.
3046 struct judgeEntry_params {
3047 struct DirSummary *dir; /**< directory we're examining entries in */
3048 struct SalvInfo *salvinfo; /**< SalvInfo for the current salvage job */
3052 JudgeEntry(void *arock, char *name, afs_int32 vnodeNumber,
3055 struct judgeEntry_params *params = arock;
3056 struct DirSummary *dir = params->dir;
3057 struct SalvInfo *salvinfo = params->salvinfo;
3058 struct VnodeEssence *vnodeEssence;
3059 afs_int32 dirOrphaned, todelete;
3061 dirOrphaned = IsVnodeOrphaned(salvinfo, dir->vnodeNumber);
3063 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3064 if (vnodeEssence == NULL) {
3066 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP "%s (vnode %u, unique %u)\n", dir->vnodeNumber, dir->name ? dir->name : "??", name, vnodeNumber, unique);
3069 CopyOnWrite(salvinfo, dir);
3070 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3075 #ifndef AFS_NAMEI_ENV
3076 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3077 * mount inode for the partition. If this inode were deleted, it would crash
3080 if (vnodeEssence->InodeNumber == 0) {
3081 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP "%s has no inode (vnode %d, unique %d)%s\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "-- would have deleted" : " -- deleted"));
3083 CopyOnWrite(salvinfo, dir);
3084 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3091 if (!(vnodeNumber & 1) && !Showmode
3092 && !(vnodeEssence->count || vnodeEssence->unique
3093 || vnodeEssence->modeBits)) {
3094 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP "%s (vnode %u, unique %u)%s\n",
3095 dir->vnodeNumber, (dir->name ? dir->name : "??"), name,
3096 vnodeNumber, unique,
3097 ((!unique) ? (Testing ? "-- would have deleted" : " -- deleted") :
3101 CopyOnWrite(salvinfo, dir);
3102 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3108 /* Check if the Uniquifiers match. If not, change the directory entry
3109 * so its unique matches the vnode unique. Delete if the unique is zero
3110 * or if the directory is orphaned.
3112 if (!vnodeEssence->unique || (vnodeEssence->unique) != unique) {
3113 todelete = ((!vnodeEssence->unique || dirOrphaned) ? 1 : 0);
3116 && ((strcmp(name, "..") == 0) || (strcmp(name, ".") == 0))) {
3118 /* This is an orphaned directory. Don't delete the . or ..
3119 * entry. Otherwise, it will get created in the next
3120 * salvage and deleted again here. So Just skip it.
3124 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3125 * Entries arriving here should be deleted, but the directory
3126 * is not orphaned. Therefore, the entry must be pointing at
3127 * the wrong vnode. Skip the 'else' clause and fall through;
3128 * the code below will repair the entry so it correctly points
3129 * at the vnode of the current directory (if '.') or the parent
3130 * directory (if '..'). */
3133 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u): unique changed from %u to %u %s\n",
3134 dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique,
3135 vnodeEssence->unique, (!todelete ? "" : (Testing ? "-- would have deleted" : "-- deleted")));
3139 fid.Vnode = vnodeNumber;
3140 fid.Unique = vnodeEssence->unique;
3141 CopyOnWrite(salvinfo, dir);
3142 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3144 opr_Verify(afs_dir_Create(&dir->dirHandle, name, &fid) == 0);
3147 return 0; /* no need to continue */
3151 if (strcmp(name, ".") == 0) {
3152 if (dir->vnodeNumber != vnodeNumber || (dir->unique != unique)) {
3154 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3157 CopyOnWrite(salvinfo, dir);
3158 opr_Verify(afs_dir_Delete(&dir->dirHandle, ".") == 0);
3159 fid.Vnode = dir->vnodeNumber;
3160 fid.Unique = dir->unique;
3161 opr_Verify(afs_dir_Create(&dir->dirHandle, ".", &fid) == 0);
3162 vnodeNumber = fid.Vnode; /* Get the new Essence */
3163 unique = fid.Unique;
3164 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3168 } else if (strcmp(name, "..") == 0) {
3171 struct VnodeEssence *dotdot;
3172 pa.Vnode = dir->parent;
3173 dotdot = CheckVnodeNumber(salvinfo, pa.Vnode);
3174 opr_Assert(dotdot != NULL); /* XXX Should not be assert */
3175 pa.Unique = dotdot->unique;
3177 pa.Vnode = dir->vnodeNumber;
3178 pa.Unique = dir->unique;
3180 if ((pa.Vnode != vnodeNumber) || (pa.Unique != unique)) {
3182 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir->vnodeNumber, dir->unique, vnodeNumber, unique);
3184 CopyOnWrite(salvinfo, dir);
3185 opr_Verify(afs_dir_Delete(&dir->dirHandle, "..") == 0);
3186 opr_Verify(afs_dir_Create(&dir->dirHandle, "..", &pa) == 0);
3189 vnodeNumber = pa.Vnode; /* Get the new Essence */
3191 vnodeEssence = CheckVnodeNumber(salvinfo, vnodeNumber);
3193 dir->haveDotDot = 1;
3194 } else if (strncmp(name, ".__afs", 6) == 0) {
3196 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir->vnodeNumber, name, (Testing ? "would have been" : "is"), vnodeNumber);
3199 CopyOnWrite(salvinfo, dir);
3200 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3202 vnodeEssence->claimed = 0; /* Not claimed: Orphaned */
3203 vnodeEssence->todelete = 1; /* Will later delete vnode and decr inode */
3206 if (ShowSuid && (vnodeEssence->modeBits & 06000))
3207 Log("FOUND suid/sgid file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3208 if (/* ShowMounts && */ (vnodeEssence->type == vSymlink)
3209 && !(vnodeEssence->modeBits & 0111)) {
3210 afs_sfsize_t nBytes;
3216 IH_INIT(ihP, salvinfo->fileSysDevice, dir->dirHandle.dirh_handle->ih_vid,
3217 vnodeEssence->InodeNumber);
3220 Log("ERROR %s could not open mount point vnode %u\n", dir->vname, vnodeNumber);
3224 size = FDH_SIZE(fdP);
3226 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir->vname, (int)size, vnodeNumber);
3227 FDH_REALLYCLOSE(fdP);
3234 nBytes = FDH_PREAD(fdP, buf, size, 0);
3235 if (nBytes == size) {
3237 if ( (*buf != '#' && *buf != '%') || buf[strlen(buf)-1] != '.' ) {
3238 Log("Volume %" AFS_VOLID_FMT " (%s) mount point %s" OS_DIRSEP "%s to '%s' invalid, %s to symbolic link\n",
3239 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid), dir->vname, dir->name ? dir->name : "??", name, buf,
3240 Testing ? "would convert" : "converted");
3241 vnodeEssence->modeBits |= 0111;
3242 vnodeEssence->changed = 1;
3243 } else if (ShowMounts)
3244 Log("In volume %" AFS_VOLID_FMT " (%s) found mountpoint %s" OS_DIRSEP "%s to '%s'\n",
3245 afs_printable_VolumeId_lu(dir->dirHandle.dirh_handle->ih_vid),
3246 dir->vname, dir->name ? dir->name : "??", name, buf);
3248 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3249 dir->vname, vnodeNumber, (int)size, (int)nBytes);
3251 FDH_REALLYCLOSE(fdP);
3254 if (ShowRootFiles && vnodeEssence->owner == 0 && vnodeNumber != 1)
3255 Log("FOUND root file: %s" OS_DIRSEP "%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir->name ? dir->name : "??", name, vnodeEssence->owner, vnodeEssence->group, vnodeEssence->modeBits, vnodeEssence->author, vnodeNumber, dir->vnodeNumber);
3256 if (vnodeIdToClass(vnodeNumber) == vLarge
3257 && vnodeEssence->name == NULL) {
3258 vnodeEssence->name = strdup(name);
3261 /* The directory entry points to the vnode. Check to see if the
3262 * vnode points back to the directory. If not, then let the
3263 * directory claim it (else it might end up orphaned). Vnodes
3264 * already claimed by another directory are deleted from this
3265 * directory: hardlinks to the same vnode are not allowed
3266 * from different directories.
3268 if (vnodeEssence->parent != dir->vnodeNumber) {
3269 if (!vnodeEssence->claimed && !dirOrphaned && vnodeNumber != 1) {
3270 /* Vnode does not point back to this directory.
3271 * Orphaned dirs cannot claim a file (it may belong to
3272 * another non-orphaned dir).
3275 Log("dir vnode %u: %s" OS_DIRSEP "%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""), vnodeEssence->parent, dir->vnodeNumber);
3277 vnodeEssence->parent = dir->vnodeNumber;
3278 vnodeEssence->changed = 1;
3280 /* Vnode was claimed by another directory */
3283 Log("dir vnode %u: %s" OS_DIRSEP "%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3284 } else if (vnodeNumber == 1) {
3285 Log("dir vnode %d: %s" OS_DIRSEP "%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeNumber, unique, (Testing ? "would have been " : ""));
3287 Log("dir vnode %u: %s" OS_DIRSEP "%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir->vnodeNumber, (dir->name ? dir->name : "??"), name, vnodeEssence->parent, vnodeNumber, unique, (Testing ? "would have been " : ""));
3291 CopyOnWrite(salvinfo, dir);
3292 opr_Verify(afs_dir_Delete(&dir->dirHandle, name) == 0);
3297 /* This directory claims the vnode */
3298 vnodeEssence->claimed = 1;
3300 vnodeEssence->count--;
3305 DistilVnodeEssence(struct SalvInfo *salvinfo, VolumeId rwVId,
3306 VnodeClass class, Inode ino, Unique * maxu)
3308 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
3309 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
3310 char buf[SIZEOF_LARGEDISKVNODE];
3311 struct VnodeDiskObject *vnode = (struct VnodeDiskObject *)buf;
3313 StreamHandle_t *file;
3318 IH_INIT(vip->handle, salvinfo->fileSysDevice, rwVId, ino);
3319 fdP = IH_OPEN(vip->handle);
3320 opr_Assert(fdP != NULL);
3321 file = FDH_FDOPEN(fdP, "r+");
3322 opr_Assert(file != NULL);
3323 size = OS_SIZE(fdP->fd_fd);
3324 opr_Assert(size != -1);
3325 vip->nVnodes = (size / vcp->diskSize) - 1;
3326 if (vip->nVnodes > 0) {
3327 opr_Assert((vip->nVnodes + 1) * vcp->diskSize == size);
3328 opr_Verify(STREAM_ASEEK(file, vcp->diskSize) == 0);
3329 opr_Verify((vip->vnodes = calloc(vip->nVnodes,
3330 sizeof(struct VnodeEssence)))
3332 if (class == vLarge) {
3333 opr_Verify((vip->inodes = calloc(vip->nVnodes, sizeof(Inode)))
3343 vip->volumeBlockCount = vip->nAllocatedVnodes = 0;
3344 for (vnodeIndex = 0, nVnodes = vip->nVnodes;
3345 nVnodes && STREAM_READ(vnode, vcp->diskSize, 1, file) == 1;
3346 nVnodes--, vnodeIndex++) {
3347 if (vnode->type != vNull) {
3348 struct VnodeEssence *vep = &vip->vnodes[vnodeIndex];
3349 afs_fsize_t vnodeLength;
3350 vip->nAllocatedVnodes++;
3351 vep->count = vnode->linkCount;
3352 VNDISK_GET_LEN(vnodeLength, vnode);
3353 vep->blockCount = nBlocks(vnodeLength);
3354 vip->volumeBlockCount += vep->blockCount;
3355 vep->parent = vnode->parent;
3356 vep->unique = vnode->uniquifier;
3357 if (*maxu < vnode->uniquifier)
3358 *maxu = vnode->uniquifier;
3359 vep->modeBits = vnode->modeBits;
3360 vep->InodeNumber = VNDISK_GET_INO(vnode);
3361 vep->type = vnode->type;
3362 vep->author = vnode->author;
3363 vep->owner = vnode->owner;
3364 vep->group = vnode->group;
3365 if (vnode->type == vDirectory) {
3366 if (class != vLarge) {
3367 VnodeId vnodeNumber = bitNumberToVnodeNumber(vnodeIndex, class);
3368 vip->nAllocatedVnodes--;
3369 memset(vnode, 0, sizeof(*vnode));
3370 IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3371 vnodeIndexOffset(vcp, vnodeNumber),
3372 (char *)&vnode, sizeof(vnode));
3373 salvinfo->VolumeChanged = 1;
3375 vip->inodes[vnodeIndex] = VNDISK_GET_INO(vnode);
3384 GetDirName(struct SalvInfo *salvinfo, VnodeId vnode, struct VnodeEssence *vp,
3387 struct VnodeEssence *parentvp;
3393 if (vp->parent && vp->name && (parentvp = CheckVnodeNumber(salvinfo, vp->parent))
3394 && GetDirName(salvinfo, vp->parent, parentvp, path)) {
3395 strcat(path, OS_DIRSEP);
3396 strcat(path, vp->name);
3402 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3403 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3406 IsVnodeOrphaned(struct SalvInfo *salvinfo, VnodeId vnode)
3408 struct VnodeEssence *vep;
3411 return (1); /* Vnode zero does not exist */
3413 return (0); /* The root dir vnode is always claimed */
3414 vep = CheckVnodeNumber(salvinfo, vnode); /* Get the vnode essence */
3415 if (!vep || !vep->claimed)
3416 return (1); /* Vnode is not claimed - it is orphaned */
3418 return (IsVnodeOrphaned(salvinfo, vep->parent));
3422 SalvageDir(struct SalvInfo *salvinfo, char *name, VolumeId rwVid,
3423 struct VnodeInfo *dirVnodeInfo, IHandle_t * alinkH, int i,
3424 struct DirSummary *rootdir, int *rootdirfound)
3426 static struct DirSummary dir;
3427 static struct DirHandle dirHandle;
3428 struct VnodeEssence *parent;
3429 static char path[MAXPATHLEN];
3432 if (dirVnodeInfo->vnodes[i].salvaged)
3433 return; /* already salvaged */
3436 dirVnodeInfo->vnodes[i].salvaged = 1;
3438 if (dirVnodeInfo->inodes[i] == 0)
3439 return; /* Not allocated to a directory */
3441 if (bitNumberToVnodeNumber(i, vLarge) == 1) {
3442 if (dirVnodeInfo->vnodes[i].parent) {
3443 Log("Bad parent, vnode 1; %s...\n",
3444 (Testing ? "skipping" : "salvaging"));
3445 dirVnodeInfo->vnodes[i].parent = 0;
3446 dirVnodeInfo->vnodes[i].changed = 1;
3449 parent = CheckVnodeNumber(salvinfo, dirVnodeInfo->vnodes[i].parent);
3450 if (parent && parent->salvaged == 0)
3451 SalvageDir(salvinfo, name, rwVid, dirVnodeInfo, alinkH,
3452 vnodeIdToBitNumber(dirVnodeInfo->vnodes[i].parent),
3453 rootdir, rootdirfound);
3456 dir.vnodeNumber = bitNumberToVnodeNumber(i, vLarge);
3457 dir.unique = dirVnodeInfo->vnodes[i].unique;
3460 dir.parent = dirVnodeInfo->vnodes[i].parent;
3461 dir.haveDot = dir.haveDotDot = 0;
3462 dir.ds_linkH = alinkH;
3463 SetSalvageDirHandle(&dir.dirHandle, dir.rwVid, salvinfo->fileSysDevice,
3464 dirVnodeInfo->inodes[i], &salvinfo->VolumeChanged);
3466 dirok = ((RebuildDirs && !Testing) ? 0 : DirOK(&dir.dirHandle));
3469 Log("Directory bad, vnode %u; %s...\n", dir.vnodeNumber,
3470 (Testing ? "skipping" : "salvaging"));
3473 CopyAndSalvage(salvinfo, &dir);
3475 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3478 dirHandle = dir.dirHandle;
3481 GetDirName(salvinfo, bitNumberToVnodeNumber(i, vLarge),
3482 &dirVnodeInfo->vnodes[i], path);
3485 /* If enumeration failed for random reasons, we will probably delete
3486 * too much stuff, so we guard against this instead.
3488 struct judgeEntry_params judge_params;
3489 judge_params.salvinfo = salvinfo;
3490 judge_params.dir = &dir;
3492 opr_Verify(afs_dir_EnumerateDir(&dirHandle, JudgeEntry,
3493 &judge_params) == 0);
3496 /* Delete the old directory if it was copied in order to salvage.
3497 * CopyOnWrite has written the new inode # to the disk, but we still
3498 * have the old one in our local structure here. Thus, we idec the
3502 if (dir.copied && !Testing) {
3503 code = IH_DEC(dir.ds_linkH, dirHandle.dirh_handle->ih_ino, rwVid);
3504 opr_Assert(code == 0);
3505 dirVnodeInfo->inodes[i] = dir.dirHandle.dirh_inode;
3508 /* Remember rootdir DirSummary _after_ it has been judged */
3509 if (dir.vnodeNumber == 1 && dir.unique == 1) {
3510 memcpy(rootdir, &dir, sizeof(struct DirSummary));
3518 * Get a new FID that can be used to create a new file.
3520 * @param[in] volHeader vol header for the volume
3521 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3522 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3523 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3524 * updated to the new max unique if we create a new
3528 GetNewFID(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3529 VnodeClass class, AFSFid *afid, Unique *maxunique)
3532 for (i = 0; i < salvinfo->vnodeInfo[class].nVnodes; i++) {
3533 if (salvinfo->vnodeInfo[class].vnodes[i].type == vNull) {
3537 if (i == salvinfo->vnodeInfo[class].nVnodes) {
3538 /* no free vnodes; make a new one */
3539 salvinfo->vnodeInfo[class].nVnodes++;
3540 salvinfo->vnodeInfo[class].vnodes =
3541 realloc(salvinfo->vnodeInfo[class].vnodes,
3542 sizeof(struct VnodeEssence) * (i+1));
3544 salvinfo->vnodeInfo[class].vnodes[i].type = vNull;
3547 afid->Vnode = bitNumberToVnodeNumber(i, class);
3549 if (volHeader->uniquifier < (*maxunique + 1)) {
3550 /* header uniq is bad; it will get bumped by 2000 later */
3551 afid->Unique = *maxunique + 1 + 2000;
3554 /* header uniq seems okay; just use that */
3555 afid->Unique = *maxunique = volHeader->uniquifier++;
3560 * Create a vnode for a README file explaining not to use a recreated-root vol.
3562 * @param[in] volHeader vol header for the volume
3563 * @param[in] alinkH ihandle for i/o for the volume
3564 * @param[in] vid volume id
3565 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3566 * updated to the new max unique if we create a new
3568 * @param[out] afid FID for the new readme vnode
3569 * @param[out] ainode the inode for the new readme file
3571 * @return operation status
3576 CreateReadme(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3577 IHandle_t *alinkH, VolumeId vid, Unique *maxunique, AFSFid *afid,
3581 struct VnodeDiskObject *rvnode = NULL;
3583 IHandle_t *readmeH = NULL;
3584 struct VnodeEssence *vep;
3586 time_t now = time(NULL);
3588 /* Try to make the note brief, but informative. Only administrators should
3589 * be able to read this file at first, so we can hopefully assume they
3590 * know what AFS is, what a volume is, etc. */
3592 "This volume has been salvaged, but has lost its original root directory.\n"
3593 "The root directory that exists now has been recreated from orphan files\n"
3594 "from the rest of the volume. This recreated root directory may interfere\n"
3595 "with old cached data on clients, and there is no way the salvager can\n"
3596 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3597 "use this volume, but only copy the salvaged data to a new volume.\n"
3598 "Continuing to use this volume as it exists now may cause some clients to\n"
3599 "behave oddly when accessing this volume.\n"
3600 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3601 /* ^ the person reading this probably just lost some data, so they could
3602 * use some cheering up. */
3604 /* -1 for the trailing NUL */
3605 length = sizeof(readme) - 1;
3607 GetNewFID(salvinfo, volHeader, vSmall, afid, maxunique);
3609 vep = &salvinfo->vnodeInfo[vSmall].vnodes[vnodeIdToBitNumber(afid->Vnode)];
3611 /* create the inode and write the contents */
3612 readmeinode = IH_CREATE(alinkH, salvinfo->fileSysDevice,
3613 salvinfo->fileSysPath, 0, vid,
3614 afid->Vnode, afid->Unique, 1);
3615 if (!VALID_INO(readmeinode)) {
3616 Log("CreateReadme: readme IH_CREATE failed\n");
3620 IH_INIT(readmeH, salvinfo->fileSysDevice, vid, readmeinode);
3621 bytes = IH_IWRITE(readmeH, 0, readme, length);
3622 IH_RELEASE(readmeH);
3624 if (bytes != length) {
3625 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes,
3626 (int)sizeof(readme));
3630 /* create the vnode and write it out */
3631 rvnode = calloc(1, SIZEOF_SMALLDISKVNODE);
3633 Log("CreateRootDir: error alloc'ing memory\n");
3637 rvnode->type = vFile;
3639 rvnode->modeBits = 0777;
3640 rvnode->linkCount = 1;
3641 VNDISK_SET_LEN(rvnode, length);
3642 rvnode->uniquifier = afid->Unique;
3643 rvnode->dataVersion = 1;
3644 VNDISK_SET_INO(rvnode, readmeinode);
3645 rvnode->unixModifyTime = rvnode->serverModifyTime = now;
3650 rvnode->vnodeMagic = VnodeClassInfo[vSmall].magic;
3652 bytes = IH_IWRITE(salvinfo->vnodeInfo[vSmall].handle,
3653 vnodeIndexOffset(&VnodeClassInfo[vSmall], afid->Vnode),
3654 (char*)rvnode, SIZEOF_SMALLDISKVNODE);
3656 if (bytes != SIZEOF_SMALLDISKVNODE) {
3657 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3658 (int)SIZEOF_SMALLDISKVNODE);
3662 /* update VnodeEssence for new readme vnode */
3663 salvinfo->vnodeInfo[vSmall].nAllocatedVnodes++;
3665 vep->blockCount = nBlocks(length);
3666 salvinfo->vnodeInfo[vSmall].volumeBlockCount += vep->blockCount;
3667 vep->parent = rvnode->parent;
3668 vep->unique = rvnode->uniquifier;
3669 vep->modeBits = rvnode->modeBits;
3670 vep->InodeNumber = VNDISK_GET_INO(rvnode);
3671 vep->type = rvnode->type;
3672 vep->author = rvnode->author;
3673 vep->owner = rvnode->owner;
3674 vep->group = rvnode->group;
3684 *ainode = readmeinode;
3689 if (IH_DEC(alinkH, readmeinode, vid)) {
3690 Log("CreateReadme (recovery): IH_DEC failed\n");
3702 * create a root dir for a volume that lacks one.
3704 * @param[in] volHeader vol header for the volume
3705 * @param[in] alinkH ihandle for disk access for this volume group
3706 * @param[in] vid volume id we're dealing with
3707 * @param[out] rootdir populated with info about the new root dir
3708 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3709 * updated to the new max unique if we create a new
3712 * @return operation status
3717 CreateRootDir(struct SalvInfo *salvinfo, VolumeDiskData *volHeader,
3718 IHandle_t *alinkH, VolumeId vid, struct DirSummary *rootdir,
3722 int decroot = 0, decreadme = 0;
3723 AFSFid did, readmeid;
3726 struct VnodeDiskObject *rootvnode = NULL;
3727 struct acl_accessList *ACL;
3730 struct VnodeEssence *vep;
3731 Inode readmeinode = 0;
3732 time_t now = time(NULL);
3734 if (!salvinfo->vnodeInfo[vLarge].vnodes && !salvinfo->vnodeInfo[vSmall].vnodes) {
3735 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3739 if (!salvinfo->vnodeInfo[vLarge].vnodes) {
3740 /* We don't have any large vnodes in the volume; allocate room
3741 * for one so we can recreate the root dir */
3742 salvinfo->vnodeInfo[vLarge].nVnodes = 1;
3743 salvinfo->vnodeInfo[vLarge].vnodes = calloc(1, sizeof(struct VnodeEssence));
3744 salvinfo->vnodeInfo[vLarge].inodes = calloc(1, sizeof(Inode));
3746 opr_Assert(salvinfo->vnodeInfo[vLarge].vnodes);
3747 opr_Assert(salvinfo->vnodeInfo[vLarge].inodes);
3750 vep = &salvinfo->vnodeInfo[vLarge].vnodes[vnodeIdToBitNumber(1)];
3751 ip = &salvinfo->vnodeInfo[vLarge].inodes[vnodeIdToBitNumber(1)];
3752 if (vep->type != vNull) {
3753 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3757 if (CreateReadme(salvinfo, volHeader, alinkH, vid, maxunique, &readmeid,
3758 &readmeinode) != 0) {
3763 /* set the DV to a very high number, so it is unlikely that we collide
3764 * with a cached DV */
3767 rootinode = IH_CREATE(alinkH, salvinfo->fileSysDevice, salvinfo->fileSysPath,
3769 if (!VALID_INO(rootinode)) {
3770 Log("CreateRootDir: IH_CREATE failed\n");
3775 SetSalvageDirHandle(&rootdir->dirHandle, vid, salvinfo->fileSysDevice,
3776 rootinode, &salvinfo->VolumeChanged);
3780 if (afs_dir_MakeDir(&rootdir->dirHandle, (afs_int32*)&did, (afs_int32*)&did)) {
3781 Log("CreateRootDir: MakeDir failed\n");
3784 if (afs_dir_Create(&rootdir->dirHandle, "README.ROOTDIR", &readmeid)) {
3785 Log("CreateRootDir: Create failed\n");
3789 length = afs_dir_Length(&rootdir->dirHandle);
3790 DZap(&rootdir->dirHandle);
3792 /* create the new root dir vnode */
3793 rootvnode = calloc(1, SIZEOF_LARGEDISKVNODE);
3795 Log("CreateRootDir: malloc failed\n");
3799 /* only give 'rl' permissions to 'system:administrators'. We do this to
3800 * try to catch the attention of an administrator, that they should not
3801 * be writing to this directory or continue to use it. */
3802 ACL = VVnodeDiskACL(rootvnode);
3803 ACL->size = sizeof(struct acl_accessList);
3804 ACL->version = ACL_ACLVERSION;
3808 ACL->entries[0].id = -204; /* system:administrators */
3809 ACL->entries[0].rights = PRSFS_READ | PRSFS_LOOKUP;
3811 rootvnode->type = vDirectory;
3812 rootvnode->cloned = 0;
3813 rootvnode->modeBits = 0777;
3814 rootvnode->linkCount = 2;
3815 VNDISK_SET_LEN(rootvnode, length);
3816 rootvnode->uniquifier = 1;
3817 rootvnode->dataVersion = dv;
3818 VNDISK_SET_INO(rootvnode, rootinode);
3819 rootvnode->unixModifyTime = rootvnode->serverModifyTime = now;
3820 rootvnode->author = 0;
3821 rootvnode->owner = 0;
3822 rootvnode->parent = 0;
3823 rootvnode->group = 0;
3824 rootvnode->vnodeMagic = VnodeClassInfo[vLarge].magic;
3826 /* write it out to disk */
3827 bytes = IH_IWRITE(salvinfo->vnodeInfo[vLarge].handle,
3828 vnodeIndexOffset(&VnodeClassInfo[vLarge], 1),
3829 (char*)rootvnode, SIZEOF_LARGEDISKVNODE);
3831 if (bytes != SIZEOF_LARGEDISKVNODE) {
3832 /* just cast to int and don't worry about printing real 64-bit ints;
3833 * a large disk vnode isn't anywhere near the 32-bit limit */
3834 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes,
3835 (int)SIZEOF_LARGEDISKVNODE);
3839 /* update VnodeEssence for the new root vnode */
3840 salvinfo->vnodeInfo[vLarge].nAllocatedVnodes++;
3842 vep->blockCount = nBlocks(length);
3843 salvinfo->vnodeInfo[vLarge].volumeBlockCount += vep->blockCount;
3844 vep->parent = rootvnode->parent;
3845 vep->unique = rootvnode->uniquifier;
3846 vep->modeBits = rootvnode->modeBits;
3847 vep->InodeNumber = VNDISK_GET_INO(rootvnode);
3848 vep->type = rootvnode->type;
3849 vep->author = rootvnode->author;
3850 vep->owner = rootvnode->owner;
3851 vep->group = rootvnode->group;
3861 /* update DirSummary for the new root vnode */
3862 rootdir->vnodeNumber = 1;
3863 rootdir->unique = 1;
3864 rootdir->haveDot = 1;
3865 rootdir->haveDotDot = 1;
3866 rootdir->rwVid = vid;
3867 rootdir->copied = 0;
3868 rootdir->parent = 0;
3869 rootdir->name = strdup(".");
3870 rootdir->vname = volHeader->name;
3871 rootdir->ds_linkH = alinkH;
3878 if (decroot && IH_DEC(alinkH, rootinode, vid)) {
3879 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3881 if (decreadme && IH_DEC(alinkH, readmeinode, vid)) {
3882 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3892 * salvage a volume group.
3894 * @param[in] salvinfo information for the curent salvage job
3895 * @param[in] rwIsp inode summary for rw volume
3896 * @param[in] alinkH link table inode handle
3898 * @return operation status
3902 SalvageVolume(struct SalvInfo *salvinfo, struct InodeSummary *rwIsp, IHandle_t * alinkH)
3904 /* This routine, for now, will only be called for read-write volumes */
3906 int BlocksInVolume = 0, FilesInVolume = 0;
3908 struct DirSummary rootdir, oldrootdir;
3909 struct VnodeInfo *dirVnodeInfo;
3910 struct VnodeDiskObject vnode;
3911 VolumeDiskData volHeader;
3913 int orphaned, rootdirfound = 0;
3914 Unique maxunique = 0; /* the maxUniquifier from the vnodes */
3915 afs_int32 ofiles = 0, oblocks = 0; /* Number of orphaned files/blocks */
3916 struct VnodeEssence *vep;
3919 afs_sfsize_t nBytes;
3921 VnodeId LFVnode, ThisVnode;
3922 Unique LFUnique, ThisUnique;
3926 vid = rwIsp->volSummary->header.id;
3927 IH_INIT(h, salvinfo->fileSysDevice, vid, rwIsp->volSummary->header.volumeInfo);
3928 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
3929 opr_Assert(nBytes == sizeof(volHeader));
3930 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
3931 opr_Assert(volHeader.destroyMe != DESTROY_ME);
3932 /* (should not have gotten this far with DESTROY_ME flag still set!) */
3934 DistilVnodeEssence(salvinfo, vid, vLarge,
3935 rwIsp->volSummary->header.largeVnodeIndex, &maxunique);
3936 DistilVnodeEssence(salvinfo, vid, vSmall,
3937 rwIsp->volSummary->header.smallVnodeIndex, &maxunique);
3939 dirVnodeInfo = &salvinfo->vnodeInfo[vLarge];
3940 for (i = 0; i < dirVnodeInfo->nVnodes; i++) {
3941 SalvageDir(salvinfo, volHeader.name, vid, dirVnodeInfo, alinkH, i,
3942 &rootdir, &rootdirfound);
3945 nt_sync(salvinfo->fileSysDevice);
3947 sync(); /* This used to be done lower level, for every dir */
3954 if (!rootdirfound && (orphans == ORPH_ATTACH) && !Testing) {
3956 Log("Cannot find root directory for volume %lu; attempting to create "
3957 "a new one\n", afs_printable_uint32_lu(vid));
3959 code = CreateRootDir(salvinfo, &volHeader, alinkH, vid, &rootdir,
3964 salvinfo->VolumeChanged = 1;
3968 /* Parse each vnode looking for orphaned vnodes and
3969 * connect them to the tree as orphaned (if requested).
3971 oldrootdir = rootdir;
3972 for (class = 0; class < nVNODECLASSES; class++) {
3973 for (v = 0; v < salvinfo->vnodeInfo[class].nVnodes; v++) {
3974 vep = &(salvinfo->vnodeInfo[class].vnodes[v]);
3975 ThisVnode = bitNumberToVnodeNumber(v, class);
3976 ThisUnique = vep->unique;
3978 if ((vep->type == 0) || vep->claimed || ThisVnode == 1)
3979 continue; /* Ignore unused, claimed, and root vnodes */
3981 /* This vnode is orphaned. If it is a directory vnode, then the '..'
3982 * entry in this vnode had incremented the parent link count (In
3983 * JudgeEntry()). We need to go to the parent and decrement that
3984 * link count. But if the parent's unique is zero, then the parent
3985 * link count was not incremented in JudgeEntry().
3987 if (class == vLarge) { /* directory vnode */
3988 pv = vnodeIdToBitNumber(vep->parent);
3989 if (salvinfo->vnodeInfo[vLarge].vnodes[pv].unique != 0) {
3990 if (vep->parent == 1 && newrootdir) {
3991 /* this vnode's parent was the volume root, and
3992 * we just created the volume root. So, the parent
3993 * dir didn't exist during JudgeEntry, so the link
3994 * count was not inc'd there, so don't dec it here.
4000 salvinfo->vnodeInfo[vLarge].vnodes[pv].count++;
4006 continue; /* If no rootdir, can't attach orphaned files */
4008 /* Here we attach orphaned files and directories into the
4009 * root directory, LVVnode, making sure link counts stay correct.
4011 if ((orphans == ORPH_ATTACH) && !vep->todelete && !Testing) {
4012 LFVnode = rootdir.vnodeNumber; /* Lost+Found vnode number */
4013 LFUnique = rootdir.unique; /* Lost+Found uniquifier */
4015 /* Update this orphaned vnode's info. Its parent info and
4016 * link count (do for orphaned directories and files).
4018 vep->parent = LFVnode; /* Parent is the root dir */
4019 vep->unique = LFUnique;
4022 vep->count--; /* Inc link count (root dir will pt to it) */
4024 /* If this orphaned vnode is a directory, change '..'.
4025 * The name of the orphaned dir/file is unknown, so we
4026 * build a unique name. No need to CopyOnWrite the directory
4027 * since it is not connected to tree in BK or RO volume and
4028 * won't be visible there.
4030 if (class == vLarge) {
4034 /* Remove and recreate the ".." entry in this orphaned directory */
4035 SetSalvageDirHandle(&dh, vid, salvinfo->fileSysDevice,
4036 salvinfo->vnodeInfo[class].inodes[v],
4037 &salvinfo->VolumeChanged);
4039 pa.Unique = LFUnique;
4040 opr_Verify(afs_dir_Delete(&dh, "..") == 0);
4041 opr_Verify(afs_dir_Create(&dh, "..", &pa) == 0);
4043 /* The original parent's link count was decremented above.
4044 * Here we increment the new parent's link count.
4046 pv = vnodeIdToBitNumber(LFVnode);
4047 salvinfo->vnodeInfo[vLarge].vnodes[pv].count--;
4051 /* Go to the root dir and add this entry. The link count of the
4052 * root dir was incremented when ".." was created. Try 10 times.
4054 for (j = 0; j < 10; j++) {
4055 pa.Vnode = ThisVnode;
4056 pa.Unique = ThisUnique;
4058 snprintf(npath, sizeof npath, "%s.%u.%u",
4059 ((class == vLarge) ? "__ORPHANDIR__"
4060 : "__ORPHANFILE__"),
4061 ThisVnode, ThisUnique);
4063 CopyOnWrite(salvinfo, &rootdir);
4064 code = afs_dir_Create(&rootdir.dirHandle, npath, &pa);
4068 ThisUnique += 50; /* Try creating a different file */
4070 opr_Assert(code == 0);
4071 Log("Attaching orphaned %s to volume's root dir as %s\n",
4072 ((class == vLarge) ? "directory" : "file"), npath);
4074 } /* for each vnode in the class */
4075 } /* for each class of vnode */
4077 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4079 if (rootdirfound && !oldrootdir.copied && rootdir.copied) {
4081 IH_DEC(oldrootdir.ds_linkH, oldrootdir.dirHandle.dirh_inode,
4083 opr_Assert(code == 0);
4084 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4087 DFlush(); /* Flush the changes */
4088 if (!rootdirfound && (orphans == ORPH_ATTACH)) {
4089 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4090 orphans = ORPH_IGNORE;
4093 /* Write out all changed vnodes. Orphaned files and directories
4094 * will get removed here also (if requested).
4096 for (class = 0; class < nVNODECLASSES; class++) {
4097 afs_sfsize_t nVnodes = salvinfo->vnodeInfo[class].nVnodes;
4098 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
4099 struct VnodeEssence *vnodes = salvinfo->vnodeInfo[class].vnodes;
4100 FilesInVolume += salvinfo->vnodeInfo[class].nAllocatedVnodes;
4101 BlocksInVolume += salvinfo->vnodeInfo[class].volumeBlockCount;
4102 for (i = 0; i < nVnodes; i++) {
4103 struct VnodeEssence *vnp = &vnodes[i];
4104 VnodeId vnodeNumber = bitNumberToVnodeNumber(i, class);
4106 /* If the vnode is good but is unclaimed (not listed in
4107 * any directory entries), then it is orphaned.
4110 if ((vnp->type != 0) && (orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber))) {
4111 vnp->claimed = 0; /* Makes IsVnodeOrphaned calls faster */
4115 if (vnp->changed || vnp->count) {
4118 IH_IREAD(salvinfo->vnodeInfo[class].handle,
4119 vnodeIndexOffset(vcp, vnodeNumber),
4120 (char *)&vnode, sizeof(vnode));
4121 opr_Assert(nBytes == sizeof(vnode));
4123 vnode.parent = vnp->parent;
4124 oldCount = vnode.linkCount;
4125 vnode.linkCount = vnode.linkCount - vnp->count;
4128 orphaned = IsVnodeOrphaned(salvinfo, vnodeNumber);
4130 if (!vnp->todelete) {
4131 /* Orphans should have already been attached (if requested) */
4132 opr_Assert(orphans != ORPH_ATTACH);
4133 oblocks += vnp->blockCount;
4136 if (((orphans == ORPH_REMOVE) || vnp->todelete)
4138 BlocksInVolume -= vnp->blockCount;
4140 if (VNDISK_GET_INO(&vnode)) {
4142 IH_DEC(alinkH, VNDISK_GET_INO(&vnode), vid);
4143 opr_Assert(code == 0);
4145 memset(&vnode, 0, sizeof(vnode));
4147 } else if (vnp->count) {
4149 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber, oldCount, (Testing ? "would have changed to" : "now"), vnode.linkCount);
4152 vnode.modeBits = vnp->modeBits;
4155 vnode.dataVersion++;
4158 IH_IWRITE(salvinfo->vnodeInfo[class].handle,
4159 vnodeIndexOffset(vcp, vnodeNumber),
4160 (char *)&vnode, sizeof(vnode));
4161 opr_Assert(nBytes == sizeof(vnode));
4163 salvinfo->VolumeChanged = 1;
4167 if (!Showmode && ofiles) {
4168 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4170 && (orphans == ORPH_REMOVE)) ? "Removed" : "Found", ofiles,
4174 for (class = 0; class < nVNODECLASSES; class++) {
4175 struct VnodeInfo *vip = &salvinfo->vnodeInfo[class];
4176 for (i = 0; i < vip->nVnodes; i++)
4177 if (vip->vnodes[i].name)
4178 free(vip->vnodes[i].name);
4185 /* Set correct resource utilization statistics */
4186 volHeader.filecount = FilesInVolume;
4187 volHeader.diskused = BlocksInVolume;
4189 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4190 if (volHeader.uniquifier < (maxunique + 1)) {
4192 Log("Volume uniquifier %u is too low (max uniq %u); fixed\n", volHeader.uniquifier, maxunique);
4193 /* Plus 2,000 in case there are workstations out there with
4194 * cached vnodes that have since been deleted
4196 volHeader.uniquifier = (maxunique + 1 + 2000);
4200 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4201 "Only use this salvaged volume to copy data to another volume; "
4202 "do not continue to use this volume (%lu) as-is.\n",
4203 afs_printable_uint32_lu(vid));
4206 if (!Testing && salvinfo->VolumeChanged) {
4207 #ifdef FSSYNC_BUILD_CLIENT
4208 if (salvinfo->useFSYNC) {
4209 afs_int32 fsync_code;
4211 fsync_code = FSYNC_VolOp(vid, NULL, FSYNC_VOL_BREAKCBKS, FSYNC_SALVAGE, NULL);
4213 Log("Error trying to tell the fileserver to break callbacks for "
4214 "changed volume %lu; error code %ld\n",
4215 afs_printable_uint32_lu(vid),
4216 afs_printable_int32_ld(fsync_code));
4218 salvinfo->VolumeChanged = 0;
4221 #endif /* FSSYNC_BUILD_CLIENT */
4223 #ifdef AFS_DEMAND_ATTACH_FS
4224 if (!salvinfo->useFSYNC) {
4225 /* A volume's contents have changed, but the fileserver will not
4226 * break callbacks on the volume until it tries to load the vol
4227 * header. So, to reduce the amount of time a client could have
4228 * stale data, remove fsstate.dat, so the fileserver will init
4229 * callback state with all clients. This is a very coarse hammer,
4230 * and in the future we should just record which volumes have
4232 code = unlink(AFSDIR_SERVER_FSSTATE_FILEPATH);
4233 if (code && errno != ENOENT) {
4234 Log("Error %d when trying to unlink FS state file %s\n", errno,
4235 AFSDIR_SERVER_FSSTATE_FILEPATH);
4241 /* Turn off the inUse bit; the volume's been salvaged! */
4242 volHeader.inUse = 0; /* clear flag indicating inUse@last crash */
4243 volHeader.needsSalvaged = 0; /* clear 'damaged' flag */
4244 volHeader.inService = 1; /* allow service again */
4245 volHeader.needsCallback = (salvinfo->VolumeChanged != 0);
4246 volHeader.dontSalvage = DONT_SALVAGE;
4247 salvinfo->VolumeChanged = 0;
4249 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4250 opr_Assert(nBytes == sizeof(volHeader));
4253 Log("%sSalvaged %s (%" AFS_VOLID_FMT "): %d files, %d blocks\n",
4254 (Testing ? "It would have " : ""), volHeader.name, afs_printable_VolumeId_lu(volHeader.id),
4255 FilesInVolume, BlocksInVolume);
4258 IH_RELEASE(salvinfo->vnodeInfo[vSmall].handle);
4259 IH_RELEASE(salvinfo->vnodeInfo[vLarge].handle);
4265 ClearROInUseBit(struct VolumeSummary *summary)
4267 IHandle_t *h = summary->volumeInfoHandle;
4268 afs_sfsize_t nBytes;
4270 VolumeDiskData volHeader;
4272 nBytes = IH_IREAD(h, 0, (char *)&volHeader, sizeof(volHeader));
4273 opr_Assert(nBytes == sizeof(volHeader));
4274 opr_Assert(volHeader.stamp.magic == VOLUMEINFOMAGIC);
4275 volHeader.inUse = 0;
4276 volHeader.needsSalvaged = 0;
4277 volHeader.inService = 1;
4278 volHeader.dontSalvage = DONT_SALVAGE;
4280 nBytes = IH_IWRITE(h, 0, (char *)&volHeader, sizeof(volHeader));
4281 opr_Assert(nBytes == sizeof(volHeader));
4286 * Possible delete the volume.
4288 * deleteMe - Always do so, only a partial volume.
4291 MaybeZapVolume(struct SalvInfo *salvinfo, struct InodeSummary *isp,
4292 char *message, int deleteMe, int check)
4294 if (readOnly(isp) || deleteMe) {
4295 if (isp->volSummary && !isp->volSummary->deleted) {
4298 Log("Volume %" AFS_VOLID_FMT " (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", afs_printable_VolumeId_lu(isp->volumeId));
4300 Log("It will be deleted on this server (you may find it elsewhere)\n");
4303 Log("Volume %" AFS_VOLID_FMT " needs to be salvaged. Since it is read-only, however,\n", afs_printable_VolumeId_lu(isp->volumeId));
4305 Log("it will be deleted instead. It should be recloned.\n");
4310 char filename[VMAXPATHLEN];
4311 VolumeExternalName_r(isp->volumeId, filename, sizeof(filename));
4312 sprintf(path, "%s" OS_DIRSEP "%s", salvinfo->fileSysPath, filename);
4314 code = VDestroyVolumeDiskHeader(salvinfo->fileSysPartition, isp->volumeId, isp->RWvolumeId);
4316 Log("Error %ld destroying volume disk header for volume %" AFS_VOLID_FMT "\n",
4317 afs_printable_int32_ld(code),
4318 afs_printable_VolumeId_lu(isp->volumeId));
4321 /* make sure we actually delete the header file; ENOENT
4322 * is fine, since VDestroyVolumeDiskHeader probably already
4324 if (unlink(path) && errno != ENOENT) {
4325 Log("Unable to unlink %s (errno = %d)\n", path, errno);
4327 if (salvinfo->useFSYNC) {
4328 AskDelete(salvinfo, isp->volumeId);
4330 isp->volSummary->deleted = 1;
4333 } else if (!check) {
4334 Log("%s salvage was unsuccessful: read-write volume %" AFS_VOLID_FMT "\n", message,
4335 afs_printable_VolumeId_lu(isp->volumeId));
4336 Abort("Salvage of volume %" AFS_VOLID_FMT " aborted\n", afs_printable_VolumeId_lu(isp->volumeId));
4340 #ifdef AFS_DEMAND_ATTACH_FS
4342 * Locks a volume on disk for salvaging.
4344 * @param[in] volumeId volume ID to lock
4346 * @return operation status
4348 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4349 * checked out and locked again
4354 LockVolume(struct SalvInfo *salvinfo, VolumeId volumeId)
4359 /* should always be WRITE_LOCK, but keep the lock-type logic all
4360 * in one place, in VVolLockType. Params will be ignored, but
4361 * try to provide what we're logically doing. */
4362 locktype = VVolLockType(V_VOLUPD, 1);
4364 code = VLockVolumeByIdNB(volumeId, salvinfo->fileSysPartition, locktype);
4366 if (code == EBUSY) {
4367 Abort("Someone else appears to be using volume %lu; Aborted\n",
4368 afs_printable_uint32_lu(volumeId));
4370 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4371 afs_printable_int32_ld(code),
4372 afs_printable_uint32_lu(volumeId));
4375 code = FSYNC_VerifyCheckout(volumeId, salvinfo->fileSysPartition->name, FSYNC_VOL_OFF, FSYNC_SALVAGE);
4376 if (code == SYNC_DENIED) {
4377 /* need to retry checking out volumes */
4380 if (code != SYNC_OK) {
4381 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4382 afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
4385 /* set inUse = programType in the volume header to ensure that nobody
4386 * tries to use this volume again without salvaging, if we somehow crash
4387 * or otherwise exit before finishing the salvage.
4391 struct VolumeHeader header;
4392 struct VolumeDiskHeader diskHeader;
4393 struct VolumeDiskData volHeader;
4395 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHeader);
4400 DiskToVolumeHeader(&header, &diskHeader);
4402 IH_INIT(h, salvinfo->fileSysDevice, header.parent, header.volumeInfo);
4403 if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
4404 volHeader.stamp.magic != VOLUMEINFOMAGIC) {
4410 volHeader.inUse = programType;
4412 /* If we can't re-write the header, bail out and error. We don't
4413 * assert when reading the header, since it's possible the
4414 * header isn't really there (when there's no data associated
4415 * with the volume; we just delete the vol header file in that
4416 * case). But if it's there enough that we can read it, but
4417 * somehow we cannot write to it to signify we're salvaging it,
4418 * we've got a big problem and we cannot continue. */
4419 opr_Verify(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader))
4420 == sizeof(volHeader));
4427 #endif /* AFS_DEMAND_ATTACH_FS */
4430 AskError(struct SalvInfo *salvinfo, VolumeId volumeId)
4432 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4434 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4435 FSYNC_VOL_FORCE_ERROR, FSYNC_WHATEVER, NULL);
4436 if (code != SYNC_OK) {
4437 Log("AskError: failed to force volume %lu into error state; "
4438 "SYNC error code %ld (%s)\n", (long unsigned)volumeId,
4439 (long)code, SYNC_res2string(code));
4441 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4445 AskOffline(struct SalvInfo *salvinfo, VolumeId volumeId)
4450 memset(&res, 0, sizeof(res));
4452 for (i = 0; i < 3; i++) {
4453 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4454 FSYNC_VOL_OFF, FSYNC_SALVAGE, &res);
4456 if (code == SYNC_OK) {
4458 } else if (code == SYNC_DENIED) {
4460 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4462 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4463 Abort("Salvage aborted\n");
4464 } else if (code == SYNC_BAD_COMMAND) {
4465 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4468 #ifdef AFS_DEMAND_ATTACH_FS
4469 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4471 Log("AskOffline: fileserver is DAFS but we are not.\n");
4474 #ifdef AFS_DEMAND_ATTACH_FS
4475 Log("AskOffline: fileserver is not DAFS but we are.\n");
4477 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4480 Abort("Salvage aborted\n");
4483 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4484 FSYNC_clientFinis();
4488 if (code != SYNC_OK) {
4489 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4490 Abort("Salvage aborted\n");
4494 /* don't want to pass around state; remember it here */
4495 static int isDAFS = -1;
4500 afs_int32 code = 1, i;
4502 /* we don't care if we race. the answer shouldn't change */
4506 memset(&res, 0, sizeof(res));
4508 for (i = 0; code && i < 3; i++) {
4509 code = FSYNC_VolOp(0, NULL, FSYNC_VOL_LISTVOLUMES, FSYNC_SALVAGE, &res);
4511 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4512 "%ld (%s); trying again...\n", (long)code, (long)res.hdr.reason,
4513 FSYNC_reason2string(res.hdr.reason));
4514 FSYNC_clientFinis();
4520 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4524 if ((res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
4534 MaybeAskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4536 struct VolumeDiskHeader diskHdr;
4538 code = VReadVolumeDiskHeader(volumeId, salvinfo->fileSysPartition, &diskHdr);
4540 /* volume probably does not exist; no need to bring back online */
4543 AskOnline(salvinfo, volumeId);
4547 AskOnline(struct SalvInfo *salvinfo, VolumeId volumeId)
4551 for (i = 0; i < 3; i++) {
4552 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4553 FSYNC_VOL_ON, FSYNC_WHATEVER, NULL);
4555 if (code == SYNC_OK) {
4557 } else if (code == SYNC_DENIED) {
4558 Log("AskOnline: file server denied online request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4559 } else if (code == SYNC_BAD_COMMAND) {
4560 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4562 Log("AskOnline: please make sure file server binaries are same version.\n");
4566 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4567 FSYNC_clientFinis();
4574 AskDelete(struct SalvInfo *salvinfo, VolumeId volumeId)
4579 for (i = 0; i < 3; i++) {
4580 memset(&res, 0, sizeof(res));
4581 code = FSYNC_VolOp(volumeId, salvinfo->fileSysPartition->name,
4582 FSYNC_VOL_DONE, FSYNC_SALVAGE, &res);
4584 if (code == SYNC_OK) {
4586 } else if (code == SYNC_DENIED) {
4587 Log("AskOnline: file server denied DONE request to volume %" AFS_VOLID_FMT " partition %s; trying again...\n", afs_printable_VolumeId_lu(volumeId), salvinfo->fileSysPartition->name);
4588 } else if (code == SYNC_BAD_COMMAND) {
4589 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4592 #ifdef AFS_DEMAND_ATTACH_FS
4593 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4595 Log("AskOnline: fileserver is DAFS but we are not.\n");
4598 #ifdef AFS_DEMAND_ATTACH_FS
4599 Log("AskOnline: fileserver is not DAFS but we are.\n");
4601 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4605 } else if (code == SYNC_FAILED &&
4606 (res.hdr.reason == FSYNC_UNKNOWN_VOLID ||
4607 res.hdr.reason == FSYNC_WRONG_PART)) {
4608 /* volume is already effectively 'deleted' */
4612 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4613 FSYNC_clientFinis();
4620 CopyInode(Device device, Inode inode1, Inode inode2, int rwvolume)
4622 /* Volume parameter is passed in case iopen is upgraded in future to
4623 * require a volume Id to be passed
4626 IHandle_t *srcH, *destH;
4627 FdHandle_t *srcFdP, *destFdP;
4629 afs_foff_t size = 0;
4631 IH_INIT(srcH, device, rwvolume, inode1);
4632 srcFdP = IH_OPEN(srcH);
4633 opr_Assert(srcFdP != NULL);
4634 IH_INIT(destH, device, rwvolume, inode2);
4635 destFdP = IH_OPEN(destH);
4636 while ((nBytes = FDH_PREAD(srcFdP, buf, sizeof(buf), size)) > 0) {
4637 opr_Verify(FDH_PWRITE(destFdP, buf, nBytes, size) == nBytes);
4640 opr_Assert(nBytes == 0);
4641 FDH_REALLYCLOSE(srcFdP);
4642 FDH_REALLYCLOSE(destFdP);
4649 PrintInodeList(struct SalvInfo *salvinfo)
4651 struct ViceInodeInfo *ip;
4652 struct ViceInodeInfo *buf;
4655 afs_sfsize_t st_size;
4657 st_size = OS_SIZE(salvinfo->inodeFd);
4658 opr_Assert(st_size >= 0);
4659 buf = malloc(st_size);
4660 opr_Assert(buf != NULL);
4661 nInodes = st_size / sizeof(struct ViceInodeInfo);
4662 opr_Verify(OS_READ(salvinfo->inodeFd, buf, st_size) == st_size);
4663 for (ip = buf; nInodes--; ip++) {
4664 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%" AFS_VOLID_FMT ",%u,%u,%u)\n", /* VolumeId in param */
4665 PrintInode(stmp, ip->inodeNumber), ip->linkCount,
4666 (afs_uintmax_t) ip->byteCount,
4667 afs_printable_VolumeId_lu(ip->u.param[0]), ip->u.param[1],
4668 ip->u.param[2], ip->u.param[3]);
4674 PrintInodeSummary(struct SalvInfo *salvinfo)
4677 struct InodeSummary *isp;
4679 for (i = 0; i < salvinfo->nVolumesInInodeFile; i++) {
4680 isp = &salvinfo->inodeSummary[i];
4681 Log("VID:%" AFS_VOLID_FMT ", RW:%" AFS_VOLID_FMT ", index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", afs_printable_VolumeId_lu(isp->volumeId), afs_printable_VolumeId_lu(isp->RWvolumeId), isp->index, isp->nInodes, isp->nSpecialInodes, isp->maxUniquifier);
4691 opr_Assert(0); /* Fork is never executed in the NT code path */
4695 #ifdef AFS_DEMAND_ATTACH_FS
4696 if ((f == 0) && (programType == salvageServer)) {
4697 /* we are a salvageserver child */
4698 #ifdef FSSYNC_BUILD_CLIENT
4699 VChildProcReconnectFS_r();
4701 #ifdef SALVSYNC_BUILD_CLIENT
4705 #endif /* AFS_DEMAND_ATTACH_FS */
4706 #endif /* !AFS_NT40_ENV */
4716 #ifdef AFS_DEMAND_ATTACH_FS
4717 if (programType == salvageServer) {
4718 /* release all volume locks before closing down our SYNC channels.
4719 * the fileserver may try to online volumes we have checked out when
4720 * we close down FSSYNC, so we should make sure we don't have those
4721 * volumes locked when it does */
4722 struct DiskPartition64 *dp;
4724 for (i = 0; i <= VOLMAXPARTS; i++) {
4725 dp = VGetPartitionById(i, 0);
4727 VLockFileReinit(&dp->volLockFile);
4730 # ifdef SALVSYNC_BUILD_CLIENT
4733 # ifdef FSSYNC_BUILD_CLIENT
4737 #endif /* AFS_DEMAND_ATTACH_FS */
4740 if (main_thread != pthread_self())
4741 pthread_exit((void *)code);
4754 pid = wait(&status);
4755 opr_Assert(pid != -1);
4756 if (WCOREDUMP(status))
4757 Log("\"%s\" core dumped!\n", prog);
4758 if (WIFSIGNALED(status) != 0 || WEXITSTATUS(status) != 0)
4764 TimeStamp(time_t clock, int precision)
4767 static char timestamp[20];
4768 lt = localtime(&clock);
4770 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M:%S", lt);
4772 (void)strftime(timestamp, 20, "%m/%d/%Y %H:%M", lt);
4777 CheckLogFile(char * log_path)
4779 char oldSlvgLog[AFSDIR_PATH_MAX];
4781 #ifndef AFS_NT40_ENV
4788 strcpy(oldSlvgLog, log_path);
4789 strcat(oldSlvgLog, ".old");
4791 rk_rename(log_path, oldSlvgLog);
4792 logFile = afs_fopen(log_path, "a");
4794 if (!logFile) { /* still nothing, use stdout */
4798 #ifndef AFS_NAMEI_ENV
4799 AFS_DEBUG_IOPS_LOG(logFile);
4804 #ifndef AFS_NT40_ENV
4806 TimeStampLogFile(char * log_path)
4808 char stampSlvgLog[AFSDIR_PATH_MAX];
4813 lt = localtime(&now);
4814 snprintf(stampSlvgLog, sizeof stampSlvgLog,
4815 "%s.%04d-%02d-%02d.%02d:%02d:%02d", log_path,
4816 lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour,
4817 lt->tm_min, lt->tm_sec);
4819 /* try to link the logfile to a timestamped filename */
4820 /* if it fails, oh well, nothing we can do */
4821 link(log_path, stampSlvgLog);
4830 #ifndef AFS_NT40_ENV
4832 printf("Can't show log since using syslog.\n");
4843 logFile = afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH, "r");
4846 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH);
4849 while (fgets(line, sizeof(line), logFile))
4856 Log(const char *format, ...)
4862 va_start(args, format);
4863 vsnprintf(tmp, sizeof tmp, format, args);
4865 #ifndef AFS_NT40_ENV
4867 syslog(LOG_INFO, "%s", tmp);
4871 gettimeofday(&now, NULL);
4872 fprintf(logFile, "%s %s", TimeStamp(now.tv_sec, 1), tmp);
4878 Abort(const char *format, ...)
4883 va_start(args, format);
4884 vsnprintf(tmp, sizeof tmp, format, args);
4886 #ifndef AFS_NT40_ENV
4888 syslog(LOG_INFO, "%s", tmp);
4892 fprintf(logFile, "%s", tmp);
4904 ToString(const char *s)
4908 opr_Assert(p != NULL);
4912 /* Remove the FORCESALVAGE file */
4914 RemoveTheForce(char *path)
4917 struct afs_stat_st force; /* so we can use afs_stat to find it */
4918 strcpy(target,path);
4919 strcat(target,"/FORCESALVAGE");
4920 if (!Testing && ForceSalvage) {
4921 if (afs_stat(target,&force) == 0) unlink(target);
4925 #ifndef AFS_AIX32_ENV
4927 * UseTheForceLuke - see if we can use the force
4930 UseTheForceLuke(char *path)
4932 struct afs_stat_st force;
4934 strcpy(target,path);
4935 strcat(target,"/FORCESALVAGE");
4937 return (afs_stat(target, &force) == 0);
4941 * UseTheForceLuke - see if we can use the force
4944 * The VRMIX fsck will not muck with the filesystem it is supposedly
4945 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
4946 * muck directly with the root inode, which is within the normal
4948 * ListViceInodes() has a side effect of setting ForceSalvage if
4949 * it detects a need, based on root inode examination.
4952 UseTheForceLuke(char *path)
4955 return 0; /* sorry OB1 */
4960 /* NT support routines */
4962 static char execpathname[MAX_PATH];
4964 nt_SalvagePartition(char *partName, int jobn)
4969 if (!*execpathname) {
4970 n = GetModuleFileName(NULL, execpathname, MAX_PATH - 1);
4971 if (!n || n == 1023)
4974 job.cj_magic = SALVAGER_MAGIC;
4975 job.cj_number = jobn;
4976 (void)strcpy(job.cj_part, partName);
4977 pid = (int)spawnprocveb(execpathname, save_args, NULL, &job, sizeof(job));
4982 nt_SetupPartitionSalvage(void *datap, int len)
4984 childJob_t *jobp = (childJob_t *) datap;
4985 char logname[AFSDIR_PATH_MAX];
4987 if (len != sizeof(childJob_t))
4989 if (jobp->cj_magic != SALVAGER_MAGIC)
4994 (void)sprintf(logname, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH,
4996 logFile = afs_fopen(logname, "w");
5004 #endif /* AFS_NT40_ENV */