Note that destroying vol headers does not require any locks, since
unlink()s are atomic and don't cause any races for us here.
+
+ - partition and volume locking
+
+Previously, whenever the volserver would attach a volume or the salvager
+would salvage anything, the partition would be locked
+(VLockPartition_r). This unnecessarily serializes part of most volserver
+operations. It also makes it so only one salvage can run on a partition
+at a time, and that a volserver operation cannot occur at the same time
+as a salvage. With the addition of the VGC (previous section), the
+salvager partition lock is unnecessary on namei, since the salvager does
+not need to scan all volume headers.
+
+Instead of the rather heavyweight partition lock, in DAFS we now lock
+individual volumes. Locking an individual volume is done by locking a
+certain byte in the file /vicepX/.volume.lock. To lock volume with ID
+1234, you lock 1 byte at offset 1234 (with VLockFile: fcntl on unix,
+LockFileEx on windows as of the time of this writing). To read-lock the
+volume, acquire a read lock; to write-lock the volume, acquire a write
+lock.
+
+Due to the potentially very large number of volumes attached by the
+fileserver at once, the fileserver does not keep volumes locked the
+entire time they are attached (which would make volume locking
+potentially very slow). Rather, it locks the volume before attaching,
+and unlocks it when the volume has been attached. However, all other
+programs are expected to acquire a volume lock for the entire duration
+they interact with the volume. Whether a read or write lock is obtained
+is determined by the attachment mode, and whether or not the volume in
+question is an RW volume (see VVolLockType()).
+
+These locks are all acquired non-blocking, so we can just fail if we
+fail to acquire a lock. That is, an errant process holding a file-level
+lock cannot cause any process to just hang, waiting for a lock.
+
+ -- re-reading volume headers
+
+Since we cannot know whether a volume is writeable or not until the
+volume header is read, and we cannot atomically upgrade file-level
+locks, part of attachment can now occur twice (see attach2 and
+attach_volume_header). What occurs is we read the vol header, assuming
+the volume is readonly (acquiring a read or write lock as necessary).
+If, after reading the vol header, we discover that the volume is
+writable and that means we need to acquire a write lock, we read the vol
+header again while acquiring a write lock on the header.
+
+ -- verifying checkouts
+
+Since the fileserver does not hold volume locks for the entire time a
+volume is attached, there could have been a potential race between the
+fileserver and other programs. Consider when a non-fileserver program
+checks out a volume from the fileserver via FSSYNC, then locks the
+volume. Before the program locked the volume, the fileserver could have
+restarted and attached the volume. Since the fileserver releases the
+volume lock after attachment, the fileserver and the other program could
+both think they have control over the volume, which is a problem.
+
+To prevent this non-fileserver programs are expected to verify that
+their volume is checked out after locking it (FSYNC_VerifyCheckout).
+What this does is ask the fileserver for the current volume operation on
+the specific volume, and verifies that it matches how the program
+checked out the volume.
+
+For example, programType X checks out volume V from the fileserver, and
+then locks it. We then ask the fileserver for the current volume
+operation on volume V. If the programType on the vol operation does not
+match (or the PID, or the checkout mode, or other things), we know the
+fileserver must have restarted or something similar, and we do not have
+the volume checked out like we thought we did.
+
+If the program determines that the fileserver may have restarted, it
+then must retry checking out and locking the volume (or return an
+error).
Inode nearInode = 0;
memset(&specinos, 0, sizeof(specinos));
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/* DAFS currently doesn't really work with inode, so don't bother putting the
+ * locking code here right now. But in case someday someone makes DAFS work
+ * with the inode backend, make sure they remember to add the volume locking
+ * code here (make the build fail until that happens). If that is what you're
+ * trying to do, take a look at VLockVolumeByIdNB, and
+ * namei_ConvertROtoRWvolume.
+ */
+# error must lock volumes before ConvertROtoRW is usable on DAFS inode
+#endif
/* now do the work */
#include "voldefs.h"
#include "partition.h"
#include "fssync.h"
+#include "volume_inline.h"
#include <afs/errors.h>
/*@+fcnmacros +macrofcndecl@*/
int
namei_ConvertROtoRWvolume(char *pname, afs_uint32 volumeId)
{
+ int code = 0;
#ifdef FSSYNC_BUILD_CLIENT
namei_t n;
char dir_name[512], oldpath[512], newpath[512];
char smallSeen = 0;
char largeSeen = 0;
char linkSeen = 0;
- int code, fd, fd2;
+ int fd, fd2;
char *p;
DIR *dirp;
Inode ino;
struct DiskPartition64 *partP;
struct ViceInodeInfo info;
struct VolumeDiskHeader h;
+# ifdef AFS_DEMAND_ATTACH_FS
+ int locktype = 0;
+# endif /* AFS_DEMAND_ATTACH_FS */
for (partP = DiskPartitionList; partP && strcmp(partP->name, pname);
partP = partP->next);
if (!partP) {
Log("1 namei_ConvertROtoRWvolume: Couldn't find DiskPartition for %s\n", pname);
- return EIO;
+ code = EIO;
+ goto done;
+ }
+
+# ifdef AFS_DEMAND_ATTACH_FS
+ locktype = VVolLockType(V_VOLUPD, 1);
+ code = VLockVolumeByIdNB(volumeId, partP, locktype);
+ if (code) {
+ locktype = 0;
+ code = EIO;
+ goto done;
}
+# endif /* AFS_DEMAND_ATTACH_FS */
if (VReadVolumeDiskHeader(volumeId, partP, &h)) {
Log("1 namei_ConvertROtoRWvolume: Couldn't read header for RO-volume %lu.\n",
afs_printable_uint32_lu(volumeId));
- return EIO;
+ code = EIO;
+ goto done;
}
FSYNC_VolOp(volumeId, pname, FSYNC_VOL_BREAKCBKS, 0, NULL);
dirp = opendir(dir_name);
if (!dirp) {
Log("1 namei_ConvertROtoRWvolume: Could not opendir(%s)\n", dir_name);
- return EIO;
+ code = EIO;
+ goto done;
}
while ((dp = readdir(dirp))) {
Log("1 namei_ConvertROtoRWvolume: DecodeInode failed for %s/%s\n",
dir_name, dp->d_name);
closedir(dirp);
- return -1;
+ code = -1;
+ goto done;
}
if (info.u.param[1] != -1) {
Log("1 namei_ConvertROtoRWvolume: found other than volume special file %s/%s\n", dir_name, dp->d_name);
closedir(dirp);
- return -1;
+ code = -1;
+ goto done;
}
if (info.u.param[0] != volumeId) {
if (info.u.param[0] == ih->ih_vid) {
}
Log("1 namei_ConvertROtoRWvolume: found special file %s/%s for volume %lu\n", dir_name, dp->d_name, info.u.param[0]);
closedir(dirp);
- return VVOLEXISTS;
+ code = VVOLEXISTS;
+ goto done;
}
if (info.u.param[2] == VI_VOLINFO) { /* volume info file */
strlcpy(infoName, dp->d_name, sizeof(infoName));
} else {
closedir(dirp);
Log("1 namei_ConvertROtoRWvolume: unknown type %d of special file found : %s/%s\n", info.u.param[2], dir_name, dp->d_name);
- return -1;
+ code = -1;
+ goto done;
}
}
closedir(dirp);
if (!infoSeen || !smallSeen || !largeSeen || !linkSeen) {
Log("1 namei_ConvertROtoRWvolume: not all special files found in %s\n", dir_name);
- return -1;
+ code = -1;
+ goto done;
}
/*
if (fd < 0) {
Log("1 namei_ConvertROtoRWvolume: could not open RO info file: %s\n",
oldpath);
- return -1;
+ code = -1;
+ goto done;
}
t_ih.ih_ino = namei_MakeSpecIno(ih->ih_vid, VI_VOLINFO);
namei_HandleToName(&n, &t_ih);
if (fd2 < 0) {
Log("1 namei_ConvertROtoRWvolume: could not create RW info file: %s\n", n.n_path);
close(fd);
- return -1;
+ code = -1;
+ goto done;
}
code = convertVolumeInfo(fd, fd2, ih->ih_vid);
close(fd);
if (code) {
close(fd2);
unlink(n.n_path);
- return -1;
+ code = -1;
+ goto done;
}
SetOGM(fd2, ih->ih_vid, 1);
close(fd2);
fd = afs_open(newpath, O_RDWR, 0);
if (fd < 0) {
Log("1 namei_ConvertROtoRWvolume: could not open SmallIndex file: %s\n", newpath);
- return -1;
+ code = -1;
+ goto done;
}
SetOGM(fd, ih->ih_vid, 2);
close(fd);
fd = afs_open(newpath, O_RDWR, 0);
if (fd < 0) {
Log("1 namei_ConvertROtoRWvolume: could not open LargeIndex file: %s\n", newpath);
- return -1;
+ code = -1;
+ goto done;
}
SetOGM(fd, ih->ih_vid, 3);
close(fd);
if (VCreateVolumeDiskHeader(&h, partP)) {
Log("1 namei_ConvertROtoRWvolume: Couldn't write header for RW-volume %lu\n",
afs_printable_uint32_lu(h.id));
- return EIO;
+ code = EIO;
+ goto done;
}
if (VDestroyVolumeDiskHeader(partP, volumeId, h.parent)) {
FSYNC_VolOp(volumeId, pname, FSYNC_VOL_DONE, 0, NULL);
FSYNC_VolOp(h.id, pname, FSYNC_VOL_ON, 0, NULL);
+
+ done:
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (locktype) {
+ VUnlockVolumeById(volumeId, partP);
+ }
+# endif /* AFS_DEMAND_ATTACH_FS */
#endif
- return 0;
+
+ return code;
}
/* PrintInode
* still needs this because we don't want
* a stand-alone salvager to conflict with
* the salvager daemon */
- ObtainSalvageLock();
+ ObtainSharedSalvageLock();
child_slot = (int *) malloc(Parallel * sizeof(int));
assert(child_slot != NULL);
return 1;
}
+ /* obtain a shared salvage lock in the child worker, so if the
+ * salvageserver restarts (and we continue), we will still hold a lock and
+ * prevent standalone salvagers from interfering */
+ ObtainSharedSalvageLock();
+
/* Salvage individual volume; don't notify fs */
SalvageFileSys1(partP, node->command.sop.parent);
#include "partition.h"
#include "daemon_com.h"
#include "fssync.h"
+#include "volume_inline.h"
#include "salvsync.h"
#include "viceinode.h"
#include "salvage.h"
static int IsVnodeOrphaned(VnodeId vnode);
static int AskVolumeSummary(VolumeId singleVolumeNumber);
+#ifdef AFS_DEMAND_ATTACH_FS
+static int LockVolume(VolumeId volumeId);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
/* Uniquifier stored in the Inode */
static Unique
IUnique(Unique u)
{
char *name, *tdir;
char inodeListPath[256];
- FILE *inodeFile;
+ FILE *inodeFile = NULL;
static char tmpDevName[100];
static char wpath[100];
struct VolumeSummary *vsp, *esp;
int i, j;
int code;
+ int tries = 0;
+
+ retry:
+ tries++;
+ if (inodeFile) {
+ fclose(inodeFile);
+ inodeFile = NULL;
+ }
+ if (tries > VOL_MAX_CHECKOUT_RETRIES) {
+ Abort("Raced too many times with fileserver restarts while trying to "
+ "checkout/lock volumes; Aborted\n");
+ }
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (tries > 1) {
+ /* unlock all previous volume locks, since we're about to lock them
+ * again */
+ VLockFileReinit(&partP->volLockFile);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
fileSysPartition = partP;
fileSysDevice = fileSysPartition->device;
filesysfulldev = wpath;
#endif
- VLockPartition(partP->name);
- if (singleVolumeNumber || ForceSalvage)
+ if (singleVolumeNumber) {
+#ifndef AFS_DEMAND_ATTACH_FS
+ /* only non-DAFS locks the partition when salvaging a single volume;
+ * DAFS will lock the individual volumes in the VG */
+ VLockPartition(partP->name);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
ForceSalvage = 1;
- else
- ForceSalvage = UseTheForceLuke(fileSysPath);
- if (singleVolumeNumber) {
/* salvageserver already setup fssync conn for us */
if ((programType != salvageServer) && !VConnectFS()) {
Abort("Couldn't connect to file server\n");
}
+
AskOffline(singleVolumeNumber, partP->name);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (LockVolume(singleVolumeNumber)) {
+ goto retry;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
} else {
+ VLockPartition(partP->name);
+ if (ForceSalvage) {
+ ForceSalvage = 1;
+ } else {
+ ForceSalvage = UseTheForceLuke(fileSysPath);
+ }
if (!Showmode)
Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
partP->name, name, (Testing ? "(READONLY mode)" : ""));
* Fix up inodes on last volume in set (whether it is read-write
* or read-only).
*/
- GetVolumeSummary(singleVolumeNumber);
+ if (GetVolumeSummary(singleVolumeNumber)) {
+ goto retry;
+ }
for (i = j = 0, vsp = volumeSummaryp, esp = vsp + nVolumes;
i < nVolumesInInodeFile; i = j) {
RemoveTheForce(fileSysPath);
if (!Testing && singleVolumeNumber) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* unlock vol headers so the fs can attach them when we AskOnline */
+ VLockFileReinit(&fileSysPartition->volLockFile);
+#endif /* AFS_DEMAND_ATTACH_FS */
+
AskOnline(singleVolumeNumber, fileSysPartition->name);
/* Step through the volumeSummary list and set all volumes on-line.
* salvaging a whole partition
*
* @return whether we obtained the volume summary information or not
- * @retval 0 success; we obtained the volume summary information
- * @retval nonzero we did not get the volume summary information; either the
- * fileserver responded with an error, or we are not supposed to
- * ask the fileserver for the information (e.g. we are salvaging
- * the entire partition or we are not the salvageserver)
+ * @retval 0 success; we obtained the volume summary information
+ * @retval -1 we raced with a fileserver restart; volume locks and checkout
+ * must be retried
+ * @retval 1 we did not get the volume summary information; either the
+ * fileserver responded with an error, or we are not supposed to
+ * ask the fileserver for the information (e.g. we are salvaging
+ * the entire partition or we are not the salvageserver)
*
* @note for non-DAFS, always returns 1
*/
AskVolumeSummary(VolumeId singleVolumeNumber)
{
afs_int32 code = 1;
-#ifdef FSSYNC_BUILD_CLIENT
+#if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
if (programType == salvageServer) {
if (singleVolumeNumber) {
FSSYNC_VGQry_response_t q_res;
continue;
}
+ /* AskOffline for singleVolumeNumber was called much earlier */
if (q_res.children[i] != singleVolumeNumber) {
AskOffline(q_res.children[i], fileSysPartition->name);
+ if (LockVolume(q_res.children[i])) {
+ /* need to retry */
+ return -1;
+ }
}
+
code = VReadVolumeDiskHeader(q_res.children[i], fileSysPartition, &diskHdr);
if (code) {
Log("Cannot read header for %lu; trying to salvage group anyway\n",
"entire partition\n");
}
}
-#endif /* FSSYNC_BUILD_CLIENT */
+#endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
return code;
}
afs_int32 nVolumes; /**< # of vols we've encountered */
afs_int32 totalVolumes; /**< max # of vols we should encounter (the
* # of vols we've alloc'd memory for) */
+ int retry; /**< do we need to retry vol lock/checkout? */
};
/**
* information needed to record the volume summary data
*
* @return operation status
- * @retval 0 success
- * @retval 1 volume header is mis-named and should be deleted
+ * @retval 0 success
+ * @retval -1 volume locking raced with fileserver restart; checking out
+ * and locking volumes needs to be retried
+ * @retval 1 volume header is mis-named and should be deleted
*/
static int
RecordHeader(struct DiskPartition64 *dp, const char *name,
* earlier */
AskOffline(summary.header.id, fileSysPartition->name);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!badname) {
+ /* don't lock the volume if the header is bad, since we're
+ * about to delete it anyway. */
+ if (LockVolume(summary.header.id)) {
+ params->retry = 1;
+ return -1;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
}
}
if (badname) {
}
}
-void
+/**
+ * Populates volumeSummaryp with volume summary information, either by asking
+ * the fileserver for VG information, or by scanning the /vicepX partition.
+ *
+ * @param[in] singleVolumeNumber the volume ID of the single volume group we
+ * are salvaging, or 0 if this is a partition
+ * salvage
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 we raced with a fileserver restart; checking out and locking
+ * volumes must be retried
+ */
+int
GetVolumeSummary(VolumeId singleVolumeNumber)
{
afs_int32 nvols = 0;
struct SalvageScanParams params;
int code;
- if (AskVolumeSummary(singleVolumeNumber) == 0) {
+ code = AskVolumeSummary(singleVolumeNumber);
+ if (code == 0) {
/* we successfully got the vol information from the fileserver; no
* need to scan the partition */
- return;
+ return 0;
+ }
+ if (code < 0) {
+ /* we need to retry volume checkout */
+ return code;
}
if (!singleVolumeNumber) {
params.vsp = volumeSummaryp;
params.nVolumes = 0;
params.totalVolumes = nvols;
+ params.retry = 0;
/* walk the partition directory of volume headers and record the info
* about them; unlinking invalid headers */
code = VWalkVolumeHeaders(fileSysPartition, fileSysPath, RecordHeader,
UnlinkHeader, ¶ms);
+ if (params.retry) {
+ /* we apparently need to retry checking-out/locking volumes */
+ return -1;
+ }
if (code < 0) {
Abort("Failed to get volume header summary\n");
}
qsort(volumeSummaryp, nVolumes, sizeof(struct VolumeSummary),
CompareVolumes);
+
+ return 0;
}
/* Find the link table. This should be associated with the RW volume or, if
}
}
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Locks a volume on disk for salvaging.
+ *
+ * @param[in] volumeId volume ID to lock
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 volume lock raced with a fileserver restart; all volumes must
+ * checked out and locked again
+ *
+ * @note DAFS only
+ */
+static int
+LockVolume(VolumeId volumeId)
+{
+ afs_int32 code;
+ int locktype;
+
+ /* should always be WRITE_LOCK, but keep the lock-type logic all
+ * in one place, in VVolLockType. Params will be ignored, but
+ * try to provide what we're logically doing. */
+ locktype = VVolLockType(V_VOLUPD, 1);
+
+ code = VLockVolumeByIdNB(volumeId, fileSysPartition, locktype);
+ if (code) {
+ if (code == EBUSY) {
+ Abort("Someone else appears to be using volume %lu; Aborted\n",
+ afs_printable_uint32_lu(volumeId));
+ }
+ Abort("Error %ld trying to lock volume %lu; Aborted\n",
+ afs_printable_int32_ld(code),
+ afs_printable_uint32_lu(volumeId));
+ }
+
+ code = FSYNC_VerifyCheckout(volumeId, fileSysPathName, FSYNC_VOL_OFF, FSYNC_SALVAGE);
+ if (code == SYNC_DENIED) {
+ /* need to retry checking out volumes */
+ return -1;
+ }
+ if (code != SYNC_OK) {
+ Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
+ afs_printable_uint32_lu(volumeId), afs_printable_int32_ld(code));
+ }
+
+ /* set inUse = programType in the volume header to ensure that nobody
+ * tries to use this volume again without salvaging, if we somehow crash
+ * or otherwise exit before finishing the salvage.
+ */
+ if (!Testing) {
+ IHandle_t *h;
+ struct VolumeHeader header;
+ struct VolumeDiskHeader diskHeader;
+ struct VolumeDiskData volHeader;
+
+ code = VReadVolumeDiskHeader(volumeId, fileSysPartition, &diskHeader);
+ if (code) {
+ return 0;
+ }
+
+ DiskToVolumeHeader(&header, &diskHeader);
+
+ IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
+ if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
+ volHeader.stamp.magic != VOLUMEINFOMAGIC) {
+
+ IH_RELEASE(h);
+ return 0;
+ }
+
+ volHeader.inUse = programType;
+
+ /* If we can't re-write the header, bail out and error. We don't
+ * assert when reading the header, since it's possible the
+ * header isn't really there (when there's no data associated
+ * with the volume; we just delete the vol header file in that
+ * case). But if it's there enough that we can read it, but
+ * somehow we cannot write to it to signify we're salvaging it,
+ * we've got a big problem and we cannot continue. */
+ assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
+
+ IH_RELEASE(h);
+ }
+
+ return 0;
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
void
AskOffline(VolumeId volumeId, char * partition)
Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
Abort("Salvage aborted\n");
}
-
-#ifdef AFS_DEMAND_ATTACH_FS
- /* set inUse = programType in the volume header. We do this in case
- * the fileserver restarts/crashes while we are salvaging.
- * Otherwise, the fileserver could attach the volume again on
- * startup while we are salvaging, which would be very bad, or
- * schedule another salvage while we are salvaging, which would be
- * annoying. */
- if (!Testing) {
- IHandle_t *h;
- struct VolumeHeader header;
- struct VolumeDiskHeader diskHeader;
- struct VolumeDiskData volHeader;
-
- code = VReadVolumeDiskHeader(volumeId, fileSysPartition, &diskHeader);
- if (code) {
- return;
- }
-
- DiskToVolumeHeader(&header, &diskHeader);
-
- IH_INIT(h, fileSysDevice, header.parent, header.volumeInfo);
- if (IH_IREAD(h, 0, (char*)&volHeader, sizeof(volHeader)) != sizeof(volHeader) ||
- volHeader.stamp.magic != VOLUMEINFOMAGIC) {
-
- IH_RELEASE(h);
- return;
- }
-
- volHeader.inUse = programType;
-
- /* If we can't re-write the header, bail out and error. We don't
- * assert when reading the header, since it's possible the
- * header isn't really there (when there's no data associated
- * with the volume; we just delete the vol header file in that
- * case). But if it's there enough that we can read it, but
- * somehow we cannot write to it to signify we're salvaging it,
- * we've got a big problem and we cannot continue. */
- assert(IH_IWRITE(h, 0, (char*)&volHeader, sizeof(volHeader)) == sizeof(volHeader));
-
- IH_RELEASE(h);
- }
-#endif /* AFS_DEMAND_ATTACH_FS */
}
void
extern void DistilVnodeEssence(VolumeId vid, VnodeClass class, Inode ino,
Unique * maxu);
extern int GetInodeSummary(FILE *inodeFile, VolumeId singleVolumeNumber);
-extern void GetVolumeSummary(VolumeId singleVolumeNumber);
+extern int GetVolumeSummary(VolumeId singleVolumeNumber);
extern int JudgeEntry(void *dirVal, char *name, afs_int32 vnodeNumber,
afs_int32 unique);
extern void MaybeZapVolume(register struct InodeSummary *isp, char *message,
/* the maximum number of volumes in a volume group that we can handle */
#define VOL_VG_MAX_VOLS 20
+/* how many times to retry if we raced the fileserver restarting, when trying
+ * to checkout/lock a volume */
+#define VOL_MAX_CHECKOUT_RETRIES 10
+
/* maximum numbe of Vice partitions */
#define VOLMAXPARTS 255
/*@printflike@*/ extern void Log(const char *format, ...);
/* Forward declarations */
-static Volume *attach2(Error * ec, VolId vid, char *path,
- register struct VolumeHeader *header,
+static Volume *attach2(Error * ec, VolId volumeId, char *path,
struct DiskPartition64 *partp, Volume * vp,
int isbusy, int mode);
static void ReallyFreeVolume(Volume * vp);
VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
{
register Volume *vp = NULL;
- int fd, n;
- struct afs_stat status;
- struct VolumeDiskHeader diskHeader;
- struct VolumeHeader iheader;
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
strcat(path, "/");
strcat(path, name);
- if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
- Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
- if (fd > -1)
- close(fd);
- *ec = VNOVOL;
- VOL_LOCK;
- goto done;
- }
- n = read(fd, &diskHeader, sizeof(diskHeader));
- close(fd);
- if (n != sizeof(diskHeader)
- || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
- Log("VAttachVolume: Error reading volume header %s\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
- if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
- Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
-
- DiskToVolumeHeader(&iheader, &diskHeader);
-#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) {
- SYNC_response res;
- memset(&res, 0, sizeof(res));
-
- VOL_LOCK;
- if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, &res)
- != SYNC_OK) {
-
- if (res.hdr.reason == FSYNC_SALVAGE) {
- Log("VAttachVolume: file server says volume %u is salvaging\n",
- iheader.id);
- *ec = VSALVAGING;
- } else {
- Log("VAttachVolume: attach of volume %u apparently denied by file server\n",
- iheader.id);
- *ec = VNOVOL; /* XXXX */
- }
-
- goto done;
- }
- VOL_UNLOCK;
- }
-#endif
if (!vp) {
vp = (Volume *) calloc(1, sizeof(Volume));
/* attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
if (VCanUseFSSYNC() && vp) {
if ((mode == V_VOLUPD) || (VolumeWriteable(vp) && (mode == V_CLONE))) {
* notified the fileserver; don't online it now */
if (*ec != VSALVAGING)
#endif /* AFS_DEMAND_ATTACH_FS */
- FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_ON, 0, NULL);
+ FSYNC_VolOp(volumeId, partition, FSYNC_VOL_ON, 0, NULL);
} else
#endif
if (programType == fileServer && vp) {
VAttachVolumeByVp_r(Error * ec, Volume * vp, int mode)
{
char name[VMAXPATHLEN];
- int fd, n, reserve = 0;
- struct afs_stat status;
- struct VolumeDiskHeader diskHeader;
- struct VolumeHeader iheader;
+ int reserve = 0;
struct DiskPartition64 *partp;
char path[64];
int isbusy = 0;
*ec = 0;
-
- /* compute path to disk header,
- * read in header,
- * and verify magic and version stamps */
+ /* compute path to disk header */
strcpy(path, VPartitionPath(partp));
VOL_UNLOCK;
strcat(path, "/");
strcat(path, name);
- if ((fd = afs_open(path, O_RDONLY)) == -1 || afs_fstat(fd, &status) == -1) {
- Log("VAttachVolume: Failed to open %s (errno %d)\n", path, errno);
- if (fd > -1)
- close(fd);
- *ec = VNOVOL;
- VOL_LOCK;
- goto done;
- }
- n = read(fd, &diskHeader, sizeof(diskHeader));
- close(fd);
- if (n != sizeof(diskHeader)
- || diskHeader.stamp.magic != VOLUMEHEADERMAGIC) {
- Log("VAttachVolume: Error reading volume header %s\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
- if (diskHeader.stamp.version != VOLUMEHEADERVERSION) {
- Log("VAttachVolume: Volume %s, version number is incorrect; volume needs salvaged\n", path);
- *ec = VSALVAGE;
- VOL_LOCK;
- goto done;
- }
-
- /* convert on-disk header format to in-memory header format */
- DiskToVolumeHeader(&iheader, &diskHeader);
/* do volume attach
*
* NOTE: attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
/*
* the event that an error was encountered, or
}
#endif /* AFS_DEMAND_ATTACH_FS */
-/*
- * called without any locks held
- * returns with vol_glock_mutex held
+/**
+ * read in a vol header, possibly lock the vol header, and possibly check out
+ * the vol header from the fileserver, as part of volume attachment.
+ *
+ * @param[out] ec error code
+ * @param[in] vp volume pointer object
+ * @param[in] partp disk partition object of the attaching partition
+ * @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
+ * volume.h)
+ * @param[in] peek 1 to just try to read in the volume header and make sure
+ * we don't try to lock the vol, or check it out from
+ * FSSYNC or anything like that; 0 otherwise, for 'normal'
+ * operation
+ *
+ * @note As part of DAFS volume attachment, the volume header may be either
+ * read- or write-locked to ensure mutual exclusion of certain volume
+ * operations. In some cases in order to determine whether we need to
+ * read- or write-lock the header, we need to read in the header to see
+ * if the volume is RW or not. So, if we read in the header under a
+ * read-lock and determine that we actually need a write-lock on the
+ * volume header, this function will drop the read lock, acquire a write
+ * lock, and read the header in again.
*/
-private Volume *
-attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * header,
- struct DiskPartition64 * partp, register Volume * vp, int isbusy, int mode)
-{
- vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
- IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header->parent,
- header->largeVnodeIndex);
- IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header->parent,
- header->smallVnodeIndex);
- IH_INIT(vp->diskDataHandle, partp->device, header->parent,
- header->volumeInfo);
- IH_INIT(vp->linkHandle, partp->device, header->parent, header->linkTable);
- vp->shuttingDown = 0;
- vp->goingOffline = 0;
- vp->nUsers = 1;
+static void
+attach_volume_header(Error *ec, Volume *vp, struct DiskPartition64 *partp,
+ int mode, int peek)
+{
+ struct VolumeDiskHeader diskHeader;
+ struct VolumeHeader header;
+ int code;
+ int first_try = 1;
+ int lock_tries = 0, checkout_tries = 0;
+ int retry;
+ VolumeId volid = vp->hashid;
+#ifdef FSSYNC_BUILD_CLIENT
+ int checkout, done_checkout = 0;
+#endif /* FSSYNC_BUILD_CLIENT */
#ifdef AFS_DEMAND_ATTACH_FS
- vp->stats.last_attach = FT_ApproxTime();
- vp->stats.attaches++;
+ int locktype = 0, use_locktype = -1;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ retry:
+ retry = 0;
+ *ec = 0;
+
+ if (lock_tries > VOL_MAX_CHECKOUT_RETRIES) {
+ Log("VAttachVolume: retried too many times trying to lock header for "
+ "vol %lu part %s; giving up\n", afs_printable_uint32_lu(volid),
+ VPartitionPath(partp));
+ *ec = VNOVOL;
+ goto done;
+ }
+ if (checkout_tries > VOL_MAX_CHECKOUT_RETRIES) {
+ Log("VAttachVolume: retried too many times trying to checkout "
+ "vol %lu part %s; giving up\n", afs_printable_uint32_lu(volid),
+ VPartitionPath(partp));
+ *ec = VNOVOL;
+ goto done;
+ }
+
+ if (VReadVolumeDiskHeader(volid, partp, NULL)) {
+ /* short-circuit the 'volume does not exist' case */
+ *ec = VNOVOL;
+ goto done;
+ }
+
+#ifdef FSSYNC_BUILD_CLIENT
+ checkout = !done_checkout;
+ done_checkout = 1;
+ if (!peek && checkout && VMustCheckoutVolume(mode)) {
+ SYNC_response res;
+ memset(&res, 0, sizeof(res));
+
+ if (FSYNC_VolOp(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode, &res)
+ != SYNC_OK) {
+
+ if (res.hdr.reason == FSYNC_SALVAGE) {
+ Log("VAttachVolume: file server says volume %lu is salvaging\n",
+ afs_printable_uint32_lu(volid));
+ *ec = VSALVAGING;
+ } else {
+ Log("VAttachVolume: attach of volume %lu apparently denied by file server\n",
+ afs_printable_uint32_lu(volid));
+ *ec = VNOVOL; /* XXXX */
+ }
+ goto done;
+ }
+ }
#endif
- VOL_LOCK;
- IncUInt64(&VStats.attaches);
- vp->cacheCheck = ++VolumeCacheCheck;
- /* just in case this ever rolls over */
- if (!vp->cacheCheck)
- vp->cacheCheck = ++VolumeCacheCheck;
- GetVolumeHeader(vp);
- VOL_UNLOCK;
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (use_locktype < 0) {
+ /* don't know whether vol is RO or RW; assume it's RO and we can retry
+ * if it turns out to be RW */
+ locktype = VVolLockType(mode, 0);
+
+ } else {
+ /* a previous try says we should use use_locktype to lock the volume,
+ * so use that */
+ locktype = use_locktype;
+ }
+
+ if (!peek && locktype) {
+ code = VLockVolumeNB(vp, locktype);
+ if (code) {
+ if (code == EBUSY) {
+ Log("VAttachVolume: another program has vol %lu locked\n",
+ afs_printable_uint32_lu(volid));
+ } else {
+ Log("VAttachVolume: error %d trying to lock vol %lu\n",
+ code, afs_printable_uint32_lu(volid));
+ }
+
+ *ec = VNOVOL;
+ goto done;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ code = VReadVolumeDiskHeader(volid, partp, &diskHeader);
+ if (code) {
+ if (code == EIO) {
+ *ec = VSALVAGE;
+ } else {
+ *ec = VNOVOL;
+ }
+ goto done;
+ }
+
+ DiskToVolumeHeader(&header, &diskHeader);
+
+ IH_INIT(vp->vnodeIndex[vLarge].handle, partp->device, header.parent,
+ header.largeVnodeIndex);
+ IH_INIT(vp->vnodeIndex[vSmall].handle, partp->device, header.parent,
+ header.smallVnodeIndex);
+ IH_INIT(vp->diskDataHandle, partp->device, header.parent,
+ header.volumeInfo);
+ IH_INIT(vp->linkHandle, partp->device, header.parent, header.linkTable);
+
+ if (first_try) {
+ /* only need to do this once */
+ VOL_LOCK;
+ GetVolumeHeader(vp);
+ VOL_UNLOCK;
+ }
#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
/* demand attach changes the V_PEEK mechanism
* to demand attach fileservers. However, I'm trying
* to limit the number of common code changes)
*/
- if (programType != fileServer && mode == V_PEEK) {
+ if (VCanUseFSSYNC() && (mode == V_PEEK || peek)) {
SYNC_response res;
res.payload.len = sizeof(VolumeDiskData);
res.payload.buf = &vp->header->diskstuff;
- if (FSYNC_VolOp(volumeId,
+ if (FSYNC_VolOp(vp->hashid,
partp->name,
FSYNC_VOL_QUERY_HDR,
FSYNC_WHATEVER,
#endif /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
- Log("VAttachVolume: Error reading diskDataHandle vol header %s; error=%u\n", path, *ec);
+ Log("VAttachVolume: Error reading diskDataHandle header for vol %lu; "
+ "error=%u\n", afs_printable_uint32_lu(volid), *ec);
+ goto done;
}
#ifdef AFS_DEMAND_ATTACH_FS
# ifdef FSSYNC_BUILD_CLIENT
disk_header_loaded:
-#endif
- if (!*ec) {
+# endif /* FSSYNC_BUILD_CLIENT */
- /* check for pending volume operations */
- if (vp->pending_vol_op) {
- /* see if the pending volume op requires exclusive access */
- switch (vp->pending_vol_op->vol_op_state) {
- case FSSYNC_VolOpPending:
- /* this should never happen */
- assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
- break;
+ /* if the lock type we actually used to lock the volume is different than
+ * the lock type we should have used, retry with the lock type we should
+ * use */
+ use_locktype = VVolLockType(mode, VolumeWriteable(vp));
+ if (locktype != use_locktype) {
+ retry = 1;
+ lock_tries++;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ *ec = 0;
+
+ done:
+#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
+ if (!peek && *ec == 0 && retry == 0 && VMustCheckoutVolume(mode)) {
+
+ code = FSYNC_VerifyCheckout(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode);
+
+ if (code == SYNC_DENIED) {
+ /* must retry checkout; fileserver no longer thinks we have
+ * the volume */
+ retry = 1;
+ checkout_tries++;
+ done_checkout = 0;
+
+ } else if (code != SYNC_OK) {
+ *ec = VNOVOL;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS && FSSYNC_BUILD_CLIENT */
+
+ if (*ec || retry) {
+ /* either we are going to be called again for a second pass, or we
+ * encountered an error; clean up in either case */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ if (vp->linkHandle) {
+ IH_RELEASE(vp->vnodeIndex[vLarge].handle);
+ IH_RELEASE(vp->vnodeIndex[vSmall].handle);
+ IH_RELEASE(vp->diskDataHandle);
+ IH_RELEASE(vp->linkHandle);
+ }
+ }
+
+ if (*ec) {
+ return;
+ }
+ if (retry) {
+ first_try = 0;
+ goto retry;
+ }
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+static void
+attach_check_vop(Error *ec, VolumeId volid, struct DiskPartition64 *partp,
+ Volume *vp)
+{
+ *ec = 0;
+
+ if (vp->pending_vol_op) {
+
+ VOL_LOCK;
+
+ if (vp->pending_vol_op->vol_op_state == FSSYNC_VolOpRunningUnknown) {
+ int code;
+ code = VVolOpLeaveOnlineNoHeader_r(vp, vp->pending_vol_op);
+ if (code == 1) {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOnline;
+ } else if (code == 0) {
+ vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOffline;
+
+ } else {
+ /* we need the vol header to determine if the volume can be
+ * left online for the vop, so... get the header */
+
+ VOL_UNLOCK;
+
+ /* attach header with peek=1 to avoid checking out the volume
+ * or locking it; we just want the header info, we're not
+ * messing with the volume itself at all */
+ attach_volume_header(ec, vp, partp, V_PEEK, 1);
+ if (*ec) {
+ return;
+ }
+
+ VOL_LOCK;
- case FSSYNC_VolOpRunningUnknown:
if (VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOnline;
- break;
} else {
vp->pending_vol_op->vol_op_state = FSSYNC_VolOpRunningOffline;
- /* fall through to take volume offline */
}
- case FSSYNC_VolOpRunningOffline:
- /* mark the volume down */
- *ec = VOFFLINE;
- VChangeState_r(vp, VOL_STATE_UNATTACHED);
- if (V_offlineMessage(vp)[0] == '\0')
- strlcpy(V_offlineMessage(vp),
- "A volume utility is running.",
- sizeof(V_offlineMessage(vp)));
- V_offlineMessage(vp)[sizeof(V_offlineMessage(vp)) - 1] = '\0';
-
- /* check to see if we should set the specialStatus flag */
- if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
- vp->specialStatus = VBUSY;
- }
- default:
- break;
+ /* make sure we grab a new vol header and re-open stuff on
+ * actual attachment; we can't keep the data we grabbed, since
+ * it was not done under a lock and thus not safe */
+ FreeVolumeHeader(vp);
+ VReleaseVolumeHandles_r(vp);
+ }
+ }
+ /* see if the pending volume op requires exclusive access */
+ switch (vp->pending_vol_op->vol_op_state) {
+ case FSSYNC_VolOpPending:
+ /* this should never happen */
+ assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpPending);
+ break;
+
+ case FSSYNC_VolOpRunningUnknown:
+ /* this should never happen; we resolved 'unknown' above */
+ assert(vp->pending_vol_op->vol_op_state != FSSYNC_VolOpRunningUnknown);
+ break;
+
+ case FSSYNC_VolOpRunningOffline:
+ /* mark the volume down */
+ *ec = VOFFLINE;
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+
+ /* do not set V_offlineMessage here; we don't have ownership of
+ * the volume (and probably do not have the header loaded), so we
+ * can't alter the disk header */
+
+ /* check to see if we should set the specialStatus flag */
+ if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
+ vp->specialStatus = VBUSY;
}
+ break;
+
+ default:
+ break;
}
+ VOL_UNLOCK;
+ }
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * volume attachment helper function.
+ *
+ * @param[out] ec error code
+ * @param[in] volumeId volume ID of the attaching volume
+ * @param[in] path full path to the volume header .vol file
+ * @param[in] partp disk partition object for the attaching partition
+ * @param[in] vp volume object; vp->hashid, vp->device, vp->partition,
+ * vp->vnode_list, and V_attachCV (for DAFS) should already
+ * be initialized
+ * @param[in] isbusy 1 if vp->specialStatus should be set to VBUSY; that is,
+ * if there is a volume operation running for this volume
+ * that should set the volume to VBUSY during its run. 0
+ * otherwise. (see VVolOpSetVBusy_r)
+ * @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
+ * volume.h)
+ *
+ * @return pointer to the semi-attached volume pointer
+ * @retval NULL an error occurred (check value of *ec)
+ * @retval vp volume successfully attaching
+ *
+ * @pre no locks held
+ *
+ * @post VOL_LOCK held
+ */
+static Volume *
+attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
+ Volume * vp, int isbusy, int mode)
+{
+ /* have we read in the header successfully? */
+ int read_header = 0;
+
+ /* should we FreeVolume(vp) instead of VCheckFree(vp) in the error
+ * cleanup? */
+ int forcefree = 0;
+
+ *ec = 0;
+
+ vp->vnodeIndex[vLarge].handle = NULL;
+ vp->vnodeIndex[vSmall].handle = NULL;
+ vp->diskDataHandle = NULL;
+ vp->linkHandle = NULL;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ attach_check_vop(ec, volumeId, partp, vp);
+ if (!*ec) {
+ attach_volume_header(ec, vp, partp, mode, 0);
+ }
+#else
+ attach_volume_header(ec, vp, partp, mode, 0);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ if (*ec == VNOVOL) {
+ /* if the volume doesn't exist, skip straight to 'error' so we don't
+ * request a salvage */
+ goto error;
+ }
+
+ if (!*ec) {
+ read_header = 1;
+
+ vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
+ vp->shuttingDown = 0;
+ vp->goingOffline = 0;
+ vp->nUsers = 1;
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp->stats.last_attach = FT_ApproxTime();
+ vp->stats.attaches++;
+#endif
+
+ VOL_LOCK;
+ IncUInt64(&VStats.attaches);
+ vp->cacheCheck = ++VolumeCacheCheck;
+ /* just in case this ever rolls over */
+ if (!vp->cacheCheck)
+ vp->cacheCheck = ++VolumeCacheCheck;
+ VOL_UNLOCK;
+
+#ifdef AFS_DEMAND_ATTACH_FS
V_attachFlags(vp) |= VOL_HDR_LOADED;
vp->stats.last_hdr_load = vp->stats.last_attach;
- }
#endif /* AFS_DEMAND_ATTACH_FS */
+ }
if (!*ec) {
struct IndexFileHeader iHead;
#else /* AFS_DEMAND_ATTACH_FS */
*ec = VSALVAGE;
#endif /* AFS_DEMAND_ATTACH_FS */
+
goto error;
}
VChangeState_r(vp, VOL_STATE_ERROR);
vp->nUsers = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
Log("VAttachVolume: volume %s is junk; it should be destroyed at next salvage\n", path);
*ec = VNOVOL;
- return NULL;
+ forcefree = 1;
+ goto error;
}
}
AddVolumeToHashTable(vp, V_id(vp));
#ifdef AFS_DEMAND_ATTACH_FS
+ if (VCanUnlockAttached() && (V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
if ((programType != fileServer) ||
(V_inUse(vp) == fileServer)) {
AddVolumeToVByPList_r(vp);
VChangeState_r(vp, VOL_STATE_UNATTACHED);
}
#endif
+
return vp;
error:
}
#endif /* AFS_DEMAND_ATTACH_FS */
- VReleaseVolumeHandles_r(vp);
+ if (read_header) {
+ VReleaseVolumeHandles_r(vp);
+ }
#ifdef AFS_DEMAND_ATTACH_FS
VCheckSalvage(vp);
- VCheckFree(vp);
+ if (forcefree) {
+ FreeVolume(vp);
+ } else {
+ VCheckFree(vp);
+ }
#else /* !AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
#endif /* !AFS_DEMAND_ATTACH_FS */
}
#endif
- LoadVolumeHeader(ec, vp);
- if (*ec) {
- VGET_CTR_INC(V6);
- /* Only log the error if it was a totally unexpected error. Simply
- * a missing inode is likely to be caused by the volume being deleted */
- if (errno != ENXIO || LogLevel)
- Log("Volume %u: couldn't reread volume header\n",
- vp->hashid);
-#ifdef AFS_DEMAND_ATTACH_FS
- if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- } else {
- FreeVolume(vp);
- vp = NULL;
- }
-#else /* AFS_DEMAND_ATTACH_FS */
- FreeVolume(vp);
- vp = NULL;
-#endif /* AFS_DEMAND_ATTACH_FS */
- break;
- }
-
#ifdef AFS_DEMAND_ATTACH_FS
/*
- * this test MUST happen after the volume header is loaded
+ * this test MUST happen after VAttachVolymeByVp, so vol_op_state is
+ * not VolOpRunningUnknown (attach2 would have converted it to Online
+ * or Offline)
*/
/* only valid before/during demand attachment */
break;
}
#endif /* AFS_DEMAND_ATTACH_FS */
+
+ LoadVolumeHeader(ec, vp);
+ if (*ec) {
+ VGET_CTR_INC(V6);
+ /* Only log the error if it was a totally unexpected error. Simply
+ * a missing inode is likely to be caused by the volume being deleted */
+ if (errno != ENXIO || LogLevel)
+ Log("Volume %u: couldn't reread volume header\n",
+ vp->hashid);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (VCanScheduleSalvage()) {
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ } else {
+ FreeVolume(vp);
+ vp = NULL;
+ }
+#else /* AFS_DEMAND_ATTACH_FS */
+ FreeVolume(vp);
+ vp = NULL;
+#endif /* AFS_DEMAND_ATTACH_FS */
+ break;
+ }
VGET_CTR_INC(V7);
if (vp->shuttingDown) {
IH_REALLYCLOSE(vp->linkHandle);
#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+
VOL_LOCK;
VChangeState_r(vp, state_save);
#endif
IH_RELEASE(vp->linkHandle);
#ifdef AFS_DEMAND_ATTACH_FS
+ if ((V_attachFlags(vp) & VOL_LOCKED)) {
+ VUnlockVolume(vp);
+ }
+
VOL_LOCK;
VChangeState_r(vp, state_save);
#endif
}
/**
+ * same as VVolOpLeaveOnline_r, but does not require a volume with an attached
+ * header.
+ *
+ * @param[in] vp volume object
+ * @param[in] vopinfo volume operation info object
+ *
+ * @return whether it is safe to leave volume online
+ * @retval 0 it is NOT SAFE to leave the volume online
+ * @retval 1 it is safe to leave the volume online during the operation
+ * @retval -1 unsure; volume header is required in order to know whether or
+ * not is is safe to leave the volume online
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @internal volume package internal use only
+ */
+int
+VVolOpLeaveOnlineNoHeader_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
+{
+ /* follow the logic in VVolOpLeaveOnline_r; this is the same, except
+ * assume that we don't know VolumeWriteable; return -1 if the answer
+ * depends on VolumeWriteable */
+
+ if (vopinfo->vol_op_state == FSSYNC_VolOpRunningOnline) {
+ return 1;
+ }
+ if (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ vopinfo->com.reason == V_READONLY) {
+
+ return 1;
+ }
+ if (vopinfo->com.command == FSYNC_VOL_NEEDVOLUME &&
+ (vopinfo->com.reason == V_CLONE ||
+ vopinfo->com.reason == V_DUMP)) {
+
+ /* must know VolumeWriteable */
+ return -1;
+ }
+ return 0;
+}
+
+/**
* determine whether VBUSY should be set during this volume operation.
*
* @param[in] vp volume object
* @retval 0 volume salvage will occur
* @retval 1 volume salvage could not be scheduled
*
- * @note DAFS fileserver only
+ * @note DAFS only
*
- * @note this call does not synchronously schedule a volume salvage. rather,
- * it sets volume state so that when volume refcounts reach zero, a
- * volume salvage will occur. by "refcounts", we mean both nUsers and
- * nWaiters must be zero.
+ * @note in the fileserver, this call does not synchronously schedule a volume
+ * salvage. rather, it sets volume state so that when volume refcounts
+ * reach zero, a volume salvage will occur. by "refcounts", we mean both
+ * nUsers and nWaiters must be zero.
*
* @internal volume package internal use only.
*/
{
int code = 0;
/*
- * for DAFS volume utilities, transition to error state
- * (at some point in the future, we should consider
- * making volser talk to salsrv)
+ * for DAFS volume utilities that are not supposed to schedule salvages,
+ * just transition to error state instead
*/
if (!VCanScheduleSalvage()) {
VChangeState_r(vp, VOL_STATE_ERROR);
vp->salvage.reason = reason;
vp->stats.last_salvage = FT_ApproxTime();
- if (programType == fileServer && vp->header && VIsSalvager(V_inUse(vp))) {
- /* Right now we can't tell for sure if this indicates a
- * salvage is running, or if a running salvage crashed, so
- * we always ERROR the volume in case a salvage is running.
- * Once we get rid of the partition lock and instead lock
- * individual volume header files for salvages, we will
- * probably be able to tell if a salvage is running, and we
- * can do away with this behavior. */
- /* Note that we can avoid this check for non-fileserver programs,
- * since they must lock the partition in order to attach a volume.
- * Since the salvager also locks the partition to salvage, we
- * could not have reached this point for non-fileservers if this
- * volume was being salvaged; so we assume it is not. */
- Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid);
- Log(" didn't request a salvage. Forcing it offline waiting for the\n");
- Log(" salvage to finish; if you are sure no salvage is running,\n");
- Log(" run a salvage manually.\n");
-
- /* make sure neither VScheduleSalvage_r nor
- * VUpdateSalvagePriority_r try to schedule another salvage */
- vp->salvage.requested = vp->salvage.scheduled = 0;
-
- /* these stats aren't correct, but doing this makes them
- * slightly closer to being correct */
- vp->stats.salvages++;
- vp->stats.last_salvage_req = FT_ApproxTime();
- IncUInt64(&VStats.salvages);
-
- VChangeState_r(vp, VOL_STATE_ERROR);
- *ec = VSALVAGE;
- code = 1;
+ /* Note that it is not possible for us to reach this point if a
+ * salvage is already running on this volume (even if the fileserver
+ * was restarted during the salvage). If a salvage were running, the
+ * salvager would have write-locked the volume header file, so when
+ * we tried to lock the volume header, the lock would have failed,
+ * and we would have failed during attachment prior to calling
+ * VRequestSalvage. So we know that we can schedule salvages without
+ * fear of a salvage already running for this volume. */
- } else if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
+ if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
VChangeState_r(vp, VOL_STATE_SALVAGING);
*ec = VSALVAGING;
} else {
extern void VReleaseDiskLock(struct VDiskLock *dl, int locktype);
#endif /* AFS_DEMAND_ATTACH_FS */
extern int VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
+extern int VVolOpLeaveOnlineNoHeader_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
extern int VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
extern void VPurgeVolume(Error * ec, Volume * vp);
* directory or online at http://www.openafs.org/dl/license10.html
*/
+#include <sys/file.h>
+
#ifndef _AFS_VOL_VOLUME_INLINE_H
#define _AFS_VOL_VOLUME_INLINE_H 1
#include "volume.h"
+#include "partition.h"
+
/**
* tell caller whether the given program type represents a salvaging
* program.
* @return whether or not we need to lock the partition
* @retval 0 no, we do not
* @retval 1 yes, we do
+ *
+ * @note for DAFS, always returns 0, since we use per-header locks instead
*/
static_inline int
VRequiresPartLock(void)
{
+#ifdef AFS_DEMAND_ATTACH_FS
+ return 0;
+#else
switch (programType) {
case volumeServer:
case volumeUtility:
default:
return 0;
}
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+/**
+ * tells caller whether or not we need to check out a volume from the
+ * fileserver before we can use it.
+ *
+ * @param[in] mode the mode of attachment for the volume
+ *
+ * @return whether or not we need to check out the volume from the fileserver
+ * @retval 0 no, we can just use the volume
+ * @retval 1 yes, we must check out the volume before use
+ */
+static_inline int
+VMustCheckoutVolume(int mode)
+{
+ if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) {
+ return 1;
+ }
+ return 0;
}
/**
if (programType == fileServer) {
return 1;
}
- if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) {
- /* If we can FSSYNC, we assume we checked out the volume from
- * the fileserver, so inUse should not be set. If we checked out
- * with V_SECRETLY or V_PEEK, though, we didn't ask the
- * fileserver, so don't check inUse. */
- return 1;
+ if (VMustCheckoutVolume(mode)) {
+ /* assume we checked out the volume from the fileserver, so inUse
+ * should not be set */
+ return 1;
}
return 0;
}
#endif
#include "vnode.h"
#include "volume.h"
+#include "volume_inline.h"
#include "partition.h"
#include "viceinode.h"
Inode nearInode = 0;
char *part, *name;
struct stat st;
+# ifdef AFS_DEMAND_ATTACH_FS
+ int locktype = 0;
+# endif /* AFS_DEMAND_ATTACH_FS */
*ec = 0;
memset(&vol, 0, sizeof(vol));
}
}
*ec = 0;
+
+# ifdef AFS_DEMAND_ATTACH_FS
+ /* volume doesn't exist yet, but we must lock it to try to prevent something
+ * else from reading it when we're e.g. half way through creating it (or
+ * something tries to create the same volume at the same time) */
+ locktype = VVolLockType(V_VOLUPD, 1);
+ rc = VLockVolumeByIdNB(volumeId, partition, locktype);
+ if (rc) {
+ Log("VCreateVolume: vol %lu already locked by someone else\n",
+ afs_printable_uint32_lu(volumeId));
+ *ec = VNOVOL;
+ return NULL;
+ }
+# else /* AFS_DEMAND_ATTACH_FS */
VLockPartition_r(partname);
+# endif /* !AFS_DEMAND_ATTACH_FS */
+
memset(&tempHeader, 0, sizeof(tempHeader));
tempHeader.stamp.magic = VOLUMEHEADERMAGIC;
tempHeader.stamp.version = VOLUMEHEADERVERSION;
errno, volumePath);
*ec = VNOVOL;
}
- return NULL;
+ goto bad_noheader;
}
device = partition->device;
*ec = VNOVOL;
}
VDestroyVolumeDiskHeader(partition, volumeId, parentId);
+ bad_noheader:
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (locktype) {
+ VUnlockVolumeById(volumeId, partition);
+ }
+# endif /* AFS_DEMAND_ATTACH_FS */
return NULL;
}
IH_INIT(handle, device, vol.parentId, *(p->inode));
goto bad;
}
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (locktype) {
+ VUnlockVolumeById(volumeId, partition);
+ }
+# endif /* AFS_DEMAND_ATTACH_FS */
return (VAttachVolumeByName_r(ec, partname, headerName, V_SECRETLY));
}
#endif /* FSSYNC_BUILD_CLIENT */
*
* @param[in] volid volume id
* @param[in] dp disk partition object
- * @param[out] hdr volume disk header
+ * @param[out] hdr volume disk header or NULL
+ *
+ * @note if hdr is NULL, this is essentially an existence test for the vol
+ * header
*
* @return operation status
* @retval 0 success
VPartitionPath(dp), afs_printable_uint32_lu(volid));
fd = open(path, O_RDONLY);
if (fd < 0) {
- Log("VReadVolumeDiskHeader: Couldn't open header for volume %lu.\n",
- afs_printable_uint32_lu(volid));
+ Log("VReadVolumeDiskHeader: Couldn't open header for volume %lu (errno %d).\n",
+ afs_printable_uint32_lu(volid), errno);
code = -1;
- } else if (read(fd, hdr, sizeof(*hdr)) != sizeof(*hdr)) {
+
+ } else if (hdr && read(fd, hdr, sizeof(*hdr)) != sizeof(*hdr)) {
Log("VReadVolumeDiskHeader: Couldn't read header for volume %lu.\n",
afs_printable_uint32_lu(volid));
code = EIO;
if (fcntl(fd, cmd, &sf)) {
if (nonblock && (errno == EACCES || errno == EAGAIN)) {
/* We asked for a nonblocking lock, and it was already locked */
+ sf.l_pid = 0;
+ if (fcntl(fd, F_GETLK, &sf) != 0 || sf.l_pid == 0) {
+ Log("_VLockFd: fcntl failed with error %d when trying to "
+ "query the conflicting lock for fd %d (locktype=%d, "
+ "offset=%lu)\n", errno, fd, locktype,
+ afs_printable_uint32_lu(offset));
+ } else {
+ Log("_VLockFd: conflicting lock held on fd %d, offset %lu by "
+ "pid %ld (locktype=%d)\n", fd,
+ afs_printable_uint32_lu(offset), (long int)sf.l_pid,
+ locktype);
+ }
return EBUSY;
}
Log("_VLockFd: fcntl failed with error %d when trying to lock "
- "fd %d (locktype=%d)\n", errno, fd, locktype);
+ "fd %d (locktype=%d, offset=%lu)\n", errno, fd, locktype,
+ afs_printable_uint32_lu(offset));
return EIO;
}