#include <afsconfig.h>
#include <afs/param.h>
+#include <roken.h>
+
+#include <ctype.h>
+#include <stddef.h>
+
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
#include <rx/xdr.h>
#include <afs/afsint.h>
-#include <ctype.h>
-#include <signal.h>
+
#ifndef AFS_NT40_ENV
-#include <sys/param.h>
#if !defined(AFS_SGI_ENV)
#ifdef AFS_OSF_ENV
#include <ufs/fs.h>
#endif
#endif
#else /* AFS_VFSINCL_ENV */
-#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_ARM_DARWIN_ENV)
#include <sys/fs.h>
#endif
#endif /* AFS_VFSINCL_ENV */
#endif /* AFS_OSF_ENV */
#endif /* AFS_SGI_ENV */
-#endif /* AFS_NT40_ENV */
-#include <errno.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#ifdef AFS_NT40_ENV
-#include <fcntl.h>
-#else
-#include <sys/file.h>
-#endif
-#include <dirent.h>
+#endif /* !AFS_NT40_ENV */
+
#ifdef AFS_AIX_ENV
#include <sys/vfs.h>
-#include <fcntl.h>
#else
#ifdef AFS_HPUX_ENV
-#include <fcntl.h>
#include <mntent.h>
#else
#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
#else
#ifndef AFS_NT40_ENV
#if defined(AFS_SGI_ENV)
-#include <fcntl.h>
#include <mntent.h>
-
#else
#ifndef AFS_LINUX20_ENV
#include <fstab.h> /* Need to find in libc 5, present in libc 6 */
#endif
#endif /* AFS_HPUX_ENV */
#endif
-#ifndef AFS_NT40_ENV
-#include <netdb.h>
-#include <netinet/in.h>
-#include <sys/wait.h>
-#include <setjmp.h>
-#ifndef ITIMER_REAL
-#include <sys/time.h>
-#endif /* ITIMER_REAL */
-#endif /* AFS_NT40_ENV */
-#if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV)
-#include <string.h>
-#else
-#include <strings.h>
-#endif
#include "nfs.h"
#include <afs/errors.h>
#include <afs/afssyscalls.h>
#include "ihandle.h"
#include <afs/afsutil.h>
-#ifdef AFS_NT40_ENV
-#include <io.h>
-#endif
#include "daemon_com.h"
#include "fssync.h"
#include "salvsync.h"
#include "common.h"
#include "afs/afs_assert.h"
#include "vutils.h"
-#ifndef AFS_NT40_ENV
#include <afs/dir.h>
-#include <unistd.h>
-#endif
-
-#if !defined(offsetof)
-#include <stddef.h>
-#endif
-
-#ifdef O_LARGEFILE
-#define afs_stat stat64
-#define afs_fstat fstat64
-#define afs_open open64
-#else /* !O_LARGEFILE */
-#define afs_stat stat
-#define afs_fstat fstat
-#define afs_open open
-#endif /* !O_LARGEFILE */
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t vol_glock_mutex;
static volatile sig_atomic_t vol_disallow_salvsync = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
+/**
+ * has VShutdown_r been called / is VShutdown_r running?
+ */
+static int vol_shutting_down = 0;
+
#ifdef AFS_OSF_ENV
extern void *calloc(), *realloc();
#endif
/* Forward declarations */
static Volume *attach2(Error * ec, VolId volumeId, char *path,
struct DiskPartition64 *partp, Volume * vp,
- int isbusy, int mode);
+ int isbusy, int mode, int *acheckedOut);
static void ReallyFreeVolume(Volume * vp);
#ifdef AFS_DEMAND_ATTACH_FS
static void FreeVolume(Volume * vp);
static void LoadVolumeHeader(Error * ec, Volume * vp);
static int VCheckOffline(Volume * vp);
static int VCheckDetach(Volume * vp);
-static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flags);
+static Volume * GetVolume(Error * ec, Error * client_ec, VolId volumeId,
+ Volume * hint, const struct timespec *ts);
int LogLevel; /* Vice loglevel--not defined as extern so that it will be
* defined when not linked with vice, XXXX */
opts->canUseFSSYNC = 0;
opts->canUseSALVSYNC = 0;
+ opts->interrupt_rxcall = NULL;
+ opts->offline_timeout = -1;
+ opts->offline_shutdown_timeout = -1;
+
#ifdef FAST_RESTART
opts->unsafe_attach = 1;
#else /* !FAST_RESTART */
CV_BROADCAST(&vol_vinit_cond);
}
+static_inline void
+VLogOfflineTimeout(const char *type, afs_int32 timeout)
+{
+ if (timeout < 0) {
+ return;
+ }
+ if (timeout == 0) {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "immediately\n", type);
+ } else {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "after %ld second%s\n", type, (long)timeout, timeout==1?"":"s");
+ }
+}
+
int
VInitVolumePackage2(ProgramType pt, VolumePackageOptions * opts)
{
programType = pt;
vol_opts = *opts;
+#ifndef AFS_PTHREAD_ENV
+ if (opts->offline_timeout != -1 || opts->offline_shutdown_timeout != -1) {
+ Log("VInitVolumePackage: offline_timeout and/or "
+ "offline_shutdown_timeout was specified, but the volume package "
+ "does not support these for LWP builds\n");
+ return -1;
+ }
+#endif
+ VLogOfflineTimeout("volumes going offline", opts->offline_timeout);
+ VLogOfflineTimeout("volumes going offline during shutdown",
+ opts->offline_shutdown_timeout);
+
memset(&VStats, 0, sizeof(VStats));
VStats.hdr_cache_size = 200;
vp->partition = partition;
vp->hashid = vid;
queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
CV_INIT(&V_attachCV(vp), "partattach", CV_DEFAULT, 0);
vb->batch[vb->size++] = vp;
Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
params.n_parts, params.n_parts > 1 ? "s" : "");
+ vol_shutting_down = 1;
+
if (vol_attach_threads > 1) {
/* prepare for parallel shutdown */
params.n_threads = vol_attach_threads;
}
Log("VShutdown: shutting down on-line volumes...\n");
+ vol_shutting_down = 1;
for (i = 0; i < VolumeHashTable.Size; i++) {
/* try to hold first volume in the hash table */
for (queue_Scan(&VolumeHashTable.Table[i],vp,np,Volume)) {
osi_Assert(vp != NULL);
memset(vp, 0, sizeof(Volume));
queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
}
char path[64];
int isbusy = 0;
VolId volumeId;
+ int checkedOut;
#ifdef AFS_DEMAND_ATTACH_FS
VolumeStats stats_save;
Volume *svp = NULL;
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
if (!vp) {
vp->device = partp->device;
vp->partition = partp;
queue_Init(&vp->vnode_list);
+ queue_Init(&vp->rx_call_list);
#ifdef AFS_DEMAND_ATTACH_FS
CV_INIT(&V_attachCV(vp), "vp attach", CV_DEFAULT, 0);
#endif /* AFS_DEMAND_ATTACH_FS */
/* attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
if (VCanUseFSSYNC() && vp) {
#ifdef AFS_DEMAND_ATTACH_FS
if (mode == V_PEEK) {
vp->needsPutBack = 0;
} else {
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
}
#else /* !AFS_DEMAND_ATTACH_FS */
/* duplicate computation in fssync.c about whether the server
|| (!VolumeWriteable(vp) && (mode == V_CLONE || mode == V_DUMP)))
vp->needsPutBack = 0;
else
- vp->needsPutBack = 1;
+ vp->needsPutBack = VOL_PUTBACK;
#endif /* !AFS_DEMAND_ATTACH_FS */
}
- /* OK, there's a problem here, but one that I don't know how to
- * fix right now, and that I don't think should arise often.
- * Basically, we should only put back this volume to the server if
- * it was given to us by the server, but since we don't have a vp,
- * we can't run the VolumeWriteable function to find out as we do
- * above when computing vp->needsPutBack. So we send it back, but
- * there's a path in VAttachVolume on the server which may abort
- * if this volume doesn't have a header. Should be pretty rare
- * for all of that to happen, but if it does, probably the right
- * fix is for the server to allow the return of readonly volumes
- * that it doesn't think are really checked out. */
#ifdef FSSYNC_BUILD_CLIENT
- if (VCanUseFSSYNC() && vp == NULL &&
- mode != V_SECRETLY && mode != V_PEEK) {
+ /* Only give back the vol to the fileserver if we checked it out; attach2
+ * will set checkedOut only if we successfully checked it out from the
+ * fileserver. */
+ if (VCanUseFSSYNC() && vp == NULL && checkedOut) {
#ifdef AFS_DEMAND_ATTACH_FS
/* If we couldn't attach but we scheduled a salvage, we already
VolId volumeId;
Volume * nvp = NULL;
VolumeStats stats_save;
+ int checkedOut;
*ec = 0;
/* volume utility should never call AttachByVp */
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
/* do volume attach
*
* NOTE: attach2 is entered without any locks, and returns
* with vol_glock_mutex held */
- vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode);
+ vp = attach2(ec, volumeId, path, partp, vp, isbusy, mode, &checkedOut);
/*
* the event that an error was encountered, or
* we don't try to lock the vol, or check it out from
* FSSYNC or anything like that; 0 otherwise, for 'normal'
* operation
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is untouched.
*
* @note As part of DAFS volume attachment, the volume header may be either
* read- or write-locked to ensure mutual exclusion of certain volume
*/
static void
attach_volume_header(Error *ec, Volume *vp, struct DiskPartition64 *partp,
- int mode, int peek)
+ int mode, int peek, int *acheckedOut)
{
struct VolumeDiskHeader diskHeader;
struct VolumeHeader header;
}
goto done;
}
+ *acheckedOut = 1;
}
#endif
#ifdef AFS_DEMAND_ATTACH_FS
static void
attach_check_vop(Error *ec, VolumeId volid, struct DiskPartition64 *partp,
- Volume *vp)
+ Volume *vp, int *acheckedOut)
{
*ec = 0;
/* attach header with peek=1 to avoid checking out the volume
* or locking it; we just want the header info, we're not
* messing with the volume itself at all */
- attach_volume_header(ec, vp, partp, V_PEEK, 1);
+ attach_volume_header(ec, vp, partp, V_PEEK, 1, acheckedOut);
if (*ec) {
return;
}
* @param[in] path full path to the volume header .vol file
* @param[in] partp disk partition object for the attaching partition
* @param[in] vp volume object; vp->hashid, vp->device, vp->partition,
- * vp->vnode_list, and V_attachCV (for DAFS) should already
- * be initialized
+ * vp->vnode_list, vp->rx_call_list, and V_attachCV (for
+ * DAFS) should already be initialized
* @param[in] isbusy 1 if vp->specialStatus should be set to VBUSY; that is,
* if there is a volume operation running for this volume
* that should set the volume to VBUSY during its run. 0
* otherwise. (see VVolOpSetVBusy_r)
* @param[in] mode attachment mode such as V_VOLUPD, V_DUMP, etc (see
* volume.h)
+ * @param[out] acheckedOut If we successfully checked-out the volume from
+ * the fileserver (if we needed to), this is set
+ * to 1, otherwise it is 0.
*
* @return pointer to the semi-attached volume pointer
* @retval NULL an error occurred (check value of *ec)
*/
static Volume *
attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
- Volume * vp, int isbusy, int mode)
+ Volume * vp, int isbusy, int mode, int *acheckedOut)
{
/* have we read in the header successfully? */
int read_header = 0;
vp->diskDataHandle = NULL;
vp->linkHandle = NULL;
+ *acheckedOut = 0;
+
#ifdef AFS_DEMAND_ATTACH_FS
- attach_check_vop(ec, volumeId, partp, vp);
+ attach_check_vop(ec, volumeId, partp, vp, acheckedOut);
if (!*ec) {
- attach_volume_header(ec, vp, partp, mode, 0);
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
}
#else
- attach_volume_header(ec, vp, partp, mode, 0);
+ attach_volume_header(ec, vp, partp, mode, 0, acheckedOut);
#endif /* !AFS_DEMAND_ATTACH_FS */
if (*ec == VNOVOL) {
}
#endif /* AFS_DEMAND_ATTACH_FS */
+/**** volume timeout-related stuff ****/
+
+#ifdef AFS_PTHREAD_ENV
+
+static struct timespec *shutdown_timeout;
+static pthread_once_t shutdown_timeout_once = PTHREAD_ONCE_INIT;
+
+static_inline int
+VTimedOut(const struct timespec *ts)
+{
+ struct timeval tv;
+ int code;
+
+ if (ts->tv_sec == 0) {
+ /* short-circuit; this will have always timed out */
+ return 1;
+ }
+
+ code = gettimeofday(&tv, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, assuming we have not timed out\n", errno);
+ /* assume no timeout; failure mode is we just wait longer than normal
+ * instead of returning errors when we shouldn't */
+ return 0;
+ }
+
+ if (tv.tv_sec < ts->tv_sec ||
+ (tv.tv_sec == ts->tv_sec && tv.tv_usec*1000 < ts->tv_nsec)) {
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Calculate an absolute timeout.
+ *
+ * @param[out] ts A timeout that is "timeout" seconds from now, if we return
+ * NULL, the memory is not touched
+ * @param[in] timeout How long the timeout should be from now
+ *
+ * @return timeout to use
+ * @retval NULL no timeout; wait forever
+ * @retval non-NULL the given value for "ts"
+ *
+ * @internal
+ */
+static struct timespec *
+VCalcTimeout(struct timespec *ts, afs_int32 timeout)
+{
+ struct timeval now;
+ int code;
+
+ if (timeout < 0) {
+ return NULL;
+ }
+
+ if (timeout == 0) {
+ ts->tv_sec = ts->tv_nsec = 0;
+ return ts;
+ }
+
+ code = gettimeofday(&now, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, falling back to 'forever' timeout\n", errno);
+ return NULL;
+ }
+
+ ts->tv_sec = now.tv_sec + timeout;
+ ts->tv_nsec = now.tv_usec * 1000;
+
+ return ts;
+}
+
+/**
+ * Initialize the shutdown_timeout global.
+ */
+static void
+VShutdownTimeoutInit(void)
+{
+ struct timespec *ts;
+
+ ts = malloc(sizeof(*ts));
+
+ shutdown_timeout = VCalcTimeout(ts, vol_opts.offline_shutdown_timeout);
+
+ if (!shutdown_timeout) {
+ free(ts);
+ }
+}
+
+/**
+ * Figure out the timeout that should be used for waiting for offline volumes.
+ *
+ * @param[out] ats Storage space for a local timeout value if needed
+ *
+ * @return The timeout value that should be used
+ * @retval NULL No timeout; wait forever for offlining volumes
+ * @retval non-NULL A pointer to the absolute time that should be used as
+ * the deadline for waiting for offlining volumes.
+ *
+ * @note If we return non-NULL, the pointer we return may or may not be the
+ * same as "ats"
+ */
+static const struct timespec *
+VOfflineTimeout(struct timespec *ats)
+{
+ if (vol_shutting_down) {
+ osi_Assert(pthread_once(&shutdown_timeout_once, VShutdownTimeoutInit) == 0);
+ return shutdown_timeout;
+ } else {
+ return VCalcTimeout(ats, vol_opts.offline_timeout);
+ }
+}
+
+#else /* AFS_PTHREAD_ENV */
+
+/* Waiting a certain amount of time for offlining volumes is not supported
+ * for LWP due to a lack of primitives. So, we never time out */
+# define VTimedOut(x) (0)
+# define VOfflineTimeout(x) (NULL)
+
+#endif /* !AFS_PTHREAD_ENV */
+
#if 0
static int
VHold(Volume * vp)
}
#endif
+static afs_int32
+VIsGoingOffline_r(struct Volume *vp)
+{
+ afs_int32 code = 0;
+
+ if (vp->goingOffline) {
+ if (vp->specialStatus) {
+ code = vp->specialStatus;
+ } else if (V_inService(vp) == 0 || V_blessed(vp) == 0) {
+ code = VNOVOL;
+ } else {
+ code = VOFFLINE;
+ }
+ }
+
+ return code;
+}
+
+/**
+ * Tell the caller if a volume is waiting to go offline.
+ *
+ * @param[in] vp The volume we want to know about
+ *
+ * @return volume status
+ * @retval 0 volume is not waiting to go offline, go ahead and use it
+ * @retval nonzero volume is waiting to offline, and give the returned code
+ * as an error to anyone accessing the volume
+ *
+ * @pre VOL_LOCK is NOT held
+ * @pre caller holds a heavyweight reference on vp
+ */
+afs_int32
+VIsGoingOffline(struct Volume *vp)
+{
+ afs_int32 code;
+
+ VOL_LOCK;
+ code = VIsGoingOffline_r(vp);
+ VOL_UNLOCK;
+
+ return code;
+}
+
+/**
+ * Register an RX call with a volume.
+ *
+ * @param[inout] ec Error code; if unset when passed in, may be set if
+ * the volume starts going offline
+ * @param[out] client_ec @see GetVolume
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to register
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VRegisterCall_r(Error *ec, Error *client_ec, Volume *vp, struct VCallByVol *cbv)
+{
+ if (vp && cbv) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!*ec) {
+ /* just in case the volume started going offline after we got the
+ * reference to it... otherwise, if the volume started going
+ * offline right at the end of GetVolume(), we might race with the
+ * RX call scanner, and return success and add our cbv to the
+ * rx_call_list _after_ the scanner has scanned the list. */
+ *ec = VIsGoingOffline_r(vp);
+ if (client_ec) {
+ *client_ec = *ec;
+ }
+ }
+
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Prepend(&vp->rx_call_list, cbv);
+ }
+}
+
+/**
+ * Deregister an RX call with a volume.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct containing the RX call to deregister
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds heavy ref on vp
+ *
+ * @internal
+ */
+static void
+VDeregisterCall_r(Volume *vp, struct VCallByVol *cbv)
+{
+ if (cbv && queue_IsOnQueue(cbv)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ while (V_attachState(vp) == VOL_STATE_SCANNING_RXCALLS) {
+ VWaitStateChange_r(vp);
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ queue_Remove(cbv);
+ }
+}
/***************************************************/
/* get and put volume routines */
VOL_UNLOCK;
}
+/**
+ * Puts a volume reference obtained with VGetVolumeWithCall.
+ *
+ * @param[in] vp Volume struct
+ * @param[in] cbv VCallByVol struct given to VGetVolumeWithCall, or NULL if none
+ *
+ * @pre VOL_LOCK is NOT held
+ */
+void
+VPutVolumeWithCall(Volume *vp, struct VCallByVol *cbv)
+{
+ VOL_LOCK;
+ VDeregisterCall_r(vp, cbv);
+ VPutVolume_r(vp);
+ VOL_UNLOCK;
+}
/* Get a pointer to an attached volume. The pointer is returned regardless
of whether or not the volume is in service or on/off line. An error
return retVal;
}
-/* same as VGetVolume, but if a volume is waiting to go offline, we return
- * that it is actually offline, instead of waiting for it to go offline */
+/**
+ * Get a volume reference associated with an RX call.
+ *
+ * @param[out] ec @see GetVolume
+ * @param[out] client_ec @see GetVolume
+ * @param[in] volumeId @see GetVolume
+ * @param[in] ts How long to wait for going-offline volumes (absolute time).
+ * If NULL, wait forever. If ts->tv_sec == 0, return immediately
+ * with an error if the volume is going offline.
+ * @param[in] cbv Contains an RX call to be associated with this volume
+ * reference. This call may be interrupted if the volume is
+ * requested to go offline while we hold a ref on it. Give NULL
+ * to not associate an RX call with this reference.
+ *
+ * @return @see GetVolume
+ *
+ * @note for LWP builds, ts must be NULL
+ *
+ * @note A reference obtained with this function MUST be put back with
+ * VPutVolumeWithCall
+ */
Volume *
-VGetVolumeNoWait(Error * ec, Error * client_ec, VolId volumeId)
+VGetVolumeWithCall(Error * ec, Error * client_ec, VolId volumeId,
+ const struct timespec *ts, struct VCallByVol *cbv)
{
Volume *retVal;
VOL_LOCK;
- retVal = GetVolume(ec, client_ec, volumeId, NULL, 1);
+ retVal = GetVolume(ec, client_ec, volumeId, NULL, ts);
+ VRegisterCall_r(ec, client_ec, retVal, cbv);
VOL_UNLOCK;
return retVal;
}
Volume *
VGetVolume_r(Error * ec, VolId volumeId)
{
- return GetVolume(ec, NULL, volumeId, NULL, 0);
+ return GetVolume(ec, NULL, volumeId, NULL, NULL);
}
/* try to get a volume we've previously looked up */
Volume *
VGetVolumeByVp_r(Error * ec, Volume * vp)
{
- return GetVolume(ec, NULL, vp->hashid, vp, 0);
+ return GetVolume(ec, NULL, vp->hashid, vp, NULL);
}
/**
* @param[out] client_ec wire error code to be given to clients
* @param[in] volumeId ID of the volume we want
* @param[in] hint optional hint for hash lookups, or NULL
- * @param[in] nowait 0 to wait for a 'goingOffline' volume to go offline
- * before returning, 1 to return immediately
+ * @param[in] timeout absolute deadline for waiting for the volume to go
+ * offline, if it is going offline. NULL to wait forever.
*
* @return a volume handle for the specified volume
* @retval NULL an error occurred, or the volume is in such a state that
* we cannot load a header or return any volume struct
*
* @note for DAFS, caller must NOT hold a ref count on 'hint'
+ *
+ * @note 'timeout' is only checked if the volume is actually going offline; so
+ * if you pass timeout->tv_sec = 0, this will exhibit typical
+ * nonblocking behavior.
+ *
+ * @note for LWP builds, 'timeout' must be NULL
*/
static Volume *
-GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowait)
+GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint,
+ const struct timespec *timeout)
{
Volume *vp = hint;
/* pull this profiling/debugging code out of regular builds */
if (programType == fileServer) {
VGET_CTR_INC(V9);
- if (vp->goingOffline && !nowait) {
- VGET_CTR_INC(V10);
+ if (vp->goingOffline) {
+ if (timeout && VTimedOut(timeout)) {
+ /* we've timed out; don't wait for the vol */
+ } else {
+ VGET_CTR_INC(V10);
#ifdef AFS_DEMAND_ATTACH_FS
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ /* wait for the volume to go offline */
+ if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
+ VTimedWaitStateChange_r(vp, timeout, NULL);
+ }
#elif defined(AFS_PTHREAD_ENV)
- VOL_CV_WAIT(&vol_put_volume_cond);
+ VOL_CV_TIMEDWAIT(&vol_put_volume_cond, timeout, NULL);
#else /* AFS_PTHREAD_ENV */
- LWP_WaitProcess(VPutVolume);
+ /* LWP has no timed wait, so the caller better not be
+ * expecting one */
+ osi_Assert(!timeout);
+ LWP_WaitProcess(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
- continue;
+ continue;
+ }
}
if (vp->specialStatus) {
VGET_CTR_INC(V11);
VOL_UNLOCK;
}
+/**
+ * Iterate over the RX calls associated with a volume, and interrupt them.
+ *
+ * @param[in] vp The volume whose RX calls we want to scan
+ *
+ * @pre VOL_LOCK held
+ */
+static void
+VScanCalls_r(struct Volume *vp)
+{
+ struct VCallByVol *cbv, *ncbv;
+ afs_int32 err;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif
+
+ if (queue_IsEmpty(&vp->rx_call_list))
+ return; /* no calls to interrupt */
+ if (!vol_opts.interrupt_rxcall)
+ return; /* we have no function with which to interrupt calls */
+ err = VIsGoingOffline_r(vp);
+ if (!err)
+ return; /* we're not going offline anymore */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VWaitExclusiveState_r(vp);
+ state_save = VChangeState_r(vp, VOL_STATE_SCANNING_RXCALLS);
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for(queue_Scan(&vp->rx_call_list, cbv, ncbv, VCallByVol)) {
+ if (LogLevel > 0) {
+ struct rx_peer *peer;
+ char hoststr[16];
+ peer = rx_PeerOf(rx_ConnectionOf(cbv->call));
+
+ Log("Offlining volume %lu while client %s:%u is trying to read "
+ "from it; kicking client off with error %ld\n",
+ (long unsigned) vp->hashid,
+ afs_inet_ntoa_r(rx_HostOf(peer), hoststr),
+ (unsigned) ntohs(rx_PortOf(peer)),
+ (long) err);
+ }
+ (*vol_opts.interrupt_rxcall) (cbv->call, err);
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Wait for a vp to go offline.
+ *
+ * @param[out] ec 1 if a salvage on the volume has been requested and
+ * salvok == 0, 0 otherwise
+ * @param[in] vp The volume to wait for
+ * @param[in] salvok If 0, we return immediately with *ec = 1 if the volume
+ * has been requested to salvage. Otherwise we keep waiting
+ * until the volume has gone offline.
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds a lightweight ref on vp
+ *
+ * @note DAFS only
+ */
+static void
+VWaitForOfflineByVp_r(Error *ec, struct Volume *vp, int salvok)
+{
+ struct timespec timeout_ts;
+ const struct timespec *ts;
+ int timedout = 0;
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ *ec = 0;
+
+ while (!VIsOfflineState(V_attachState(vp)) && !timedout) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+ VTimedWaitStateChange_r(vp, ts, &timedout);
+ }
+ if (!timedout) {
+ /* we didn't time out, so the volume must be offline, so we're done */
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+
+ while (!VIsOfflineState(V_attachState(vp))) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+
+ VWaitStateChange_r(vp);
+ }
+}
+
+#else /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * Wait for a volume to go offline.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @note non-DAFS only (for DAFS, use @see WaitForOfflineByVp_r)
+ */
+static void
+VWaitForOffline_r(Error *ec, VolumeId volid)
+{
+ struct Volume *vp;
+ const struct timespec *ts;
+#ifdef AFS_PTHREAD_ENV
+ struct timespec timeout_ts;
+#endif
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ vp = GetVolume(ec, NULL, volid, NULL, ts);
+ if (!vp) {
+ /* error occurred so bad that we can't even get a vp; we have no
+ * information on the vol so we don't know whether to wait, so just
+ * return */
+ return;
+ }
+ if (!VIsGoingOffline_r(vp)) {
+ /* volume is no longer going offline, so we're done */
+ VPutVolume_r(vp);
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+ VPutVolume_r(vp);
+ vp = NULL;
+
+ vp = VGetVolume_r(ec, volid);
+ if (vp) {
+ /* In case it was reattached... */
+ VPutVolume_r(vp);
+ }
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/* The opposite of VAttachVolume. The volume header is written to disk, with
the inUse bit turned off. A copy of the header is maintained in memory,
however (which is why this is VOffline, not VDetach).
void
VOffline_r(Volume * vp, char *message)
{
-#ifndef AFS_DEMAND_ATTACH_FS
Error error;
+#ifndef AFS_DEMAND_ATTACH_FS
VolumeId vid = V_id(vp);
#endif
VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
VCreateReservation_r(vp);
VPutVolume_r(vp);
-
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ VWaitForOfflineByVp_r(&error, vp, 1);
VCancelReservation_r(vp);
#else /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
- vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
- if (vp) /* In case it was reattached... */
- VPutVolume_r(vp);
+ VWaitForOffline_r(&error, vid);
#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOfflineForVolOp_r(Error *ec, Volume *vp, char *message)
{
+ int salvok = 1;
osi_Assert(vp->pending_vol_op);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
VCreateReservation_r(vp);
VPutVolume_r(vp);
- /* Wait for the volume to go offline */
- while (!VIsOfflineState(V_attachState(vp))) {
+ if (vp->pending_vol_op->com.programType != salvageServer) {
/* do not give corrupted volumes to the volserver */
- if (vp->salvage.requested && vp->pending_vol_op->com.programType != salvageServer) {
- *ec = 1;
- goto error;
- }
- VWaitStateChange_r(vp);
+ salvok = 0;
}
+
*ec = 0;
- error:
+ VWaitForOfflineByVp_r(ec, vp, salvok);
+
VCancelReservation_r(vp);
}
#endif /* AFS_DEMAND_ATTACH_FS */
if (VCanUseFSSYNC()) {
notifyServer = vp->needsPutBack;
if (V_destroyMe(vp) == DESTROY_ME)
- useDone = FSYNC_VOL_DONE;
+ useDone = FSYNC_VOL_LEAVE_OFF;
#ifdef AFS_DEMAND_ATTACH_FS
else if (!V_blessed(vp) || !V_inService(vp))
useDone = FSYNC_VOL_LEAVE_OFF;
*/
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC() && notifyServer) {
+ if (notifyServer == VOL_PUTBACK_DELETE) {
+ /* Only send FSYNC_VOL_DONE if the volume was actually deleted.
+ * volserver code will set needsPutBack to VOL_PUTBACK_DELETE
+ * to signify a deleted volume. */
+ useDone = FSYNC_VOL_DONE;
+ }
/*
* Note: The server is not notified in the case of a bogus volume
* explicitly to make it possible to create a volume, do a partial
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ VOL_UNLOCK;
#endif
- /* demand attach fs
- *
- * XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex...
- *
- * VCloseVnodeFiles_r drops the glock internally */
DFlushVolume(vp->hashid);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ /* DAFS: VCloseVnodeFiles_r drops the glock internally */
VCloseVnodeFiles_r(vp);
#ifdef AFS_DEMAND_ATTACH_FS
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_DETACHING);
+
+ VOL_UNLOCK;
#endif
- /* XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex... */
DFlushVolume(vp->hashid);
- VReleaseVnodeFiles_r(vp); /* releases the glock internally */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ VReleaseVnodeFiles_r(vp); /* DAFS: releases the glock internally */
#ifdef AFS_DEMAND_ATTACH_FS
VOL_UNLOCK;
*
* @note this is one of the event handlers called by VCancelReservation_r
*
+ * @note the caller must check if the volume needs to be freed after calling
+ * this; the volume may not have any references or be on any lists after
+ * we return, and we do not free it
+ *
* @see VCancelReservation_r
*
* @internal volume package internal use only.
VOfflineForSalvage_r(vp);
}
}
+ /* If we are non-fileserver, we're telling the fileserver to
+ * salvage the vol, so we don't need to give it back separately. */
+ vp->needsPutBack = 0;
+
*ec = VSALVAGING;
} else {
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
* server over SALVSYNC. If we are not the fileserver, the request will be
* sent to the fileserver over FSSYNC (FSYNC_VOL_FORCE_ERROR/FSYNC_SALVAGE).
*
+ * @note the caller must check if the volume needs to be freed after calling
+ * this; the volume may not have any references or be on any lists after
+ * we return, and we do not free it
+ *
* @note DAFS only
*
* @internal volume package internal use only.
VScheduleSalvage_r(Volume * vp)
{
int ret=0;
- int code;
+ int code = 0;
VolState state_save;
VThreadOptions_t * thread_opts;
char partName[16];
}
}
}
- VCancelReservation_r(vp);
+
+ /* NB: this is cancelling the reservation we obtained above, but we do
+ * not call VCancelReservation_r, since that may trigger the vp dtor,
+ * possibly free'ing the vp. We need to keep the vp around after
+ * this, as the caller may reference vp without any refs. Instead, it
+ * is the duty of the caller to inspect 'vp' after we return to see if
+ * needs to be freed. */
+ osi_Assert(--vp->nWaiters >= 0);
return ret;
}
#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
}
void
-VFreeBitMapEntry_r(Error * ec, struct vnodeIndex *index,
- unsigned bitNumber)
+VFreeBitMapEntry_r(Error * ec, Volume *vp, struct vnodeIndex *index,
+ unsigned bitNumber, int flags)
{
unsigned int offset;
*ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_FREE_BITMAP_WAIT) {
+ /* VAllocBitmapEntry_r allocs bitmap entries under an exclusive volume
+ * state, so ensure we're not in an exclusive volume state when we update
+ * the bitmap */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+#endif
+
#ifdef BITMAP_LATER
if (!index->bitmap)
- return;
+ goto done;
#endif /* BITMAP_LATER */
+
offset = bitNumber >> 3;
if (offset >= index->bitmapSize) {
*ec = VNOVNODE;
- return;
+ goto done;
}
if (offset < index->bitmapOffset)
index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
*(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
+
+ done:
+#ifdef AFS_DEMAND_ATTACH_FS
+ VCancelReservation_r(vp);
+#endif
+ return; /* make the compiler happy for non-DAFS */
}
void
-VFreeBitMapEntry(Error * ec, struct vnodeIndex *index,
+VFreeBitMapEntry(Error * ec, Volume *vp, struct vnodeIndex *index,
unsigned bitNumber)
{
VOL_LOCK;
- VFreeBitMapEntry_r(ec, index, bitNumber);
+ VFreeBitMapEntry_r(ec, vp, index, bitNumber, VOL_FREE_BITMAP_WAIT);
VOL_UNLOCK;
}
struct DiskPartition64 *dp;
*ec = 0;
- name[0] = '/';
- (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, afs_printable_uint32_lu(volumeId));
+ name[0] = OS_DIRSEPC;
+ snprintf(&name[1], (sizeof name) - 1, VFORMAT,
+ afs_printable_uint32_lu(volumeId));
for (dp = DiskPartitionList; dp; dp = dp->next) {
- struct afs_stat status;
+ struct afs_stat_st status;
strcpy(path, VPartitionPath(dp));
strcat(path, name);
if (afs_stat(path, &status) == 0) {
* @return volume number
*
* @note the string must be of the form VFORMAT. the only permissible
- * deviation is a leading '/' character.
+ * deviation is a leading OS_DIRSEPC character.
*
* @see VFORMAT
*/
int
VolumeNumber(char *name)
{
- if (*name == '/')
+ if (*name == OS_DIRSEPC)
name++;
- return atoi(name + 1);
+ return strtoul(name + 1, NULL, 10);
}
/**
VolumeExternalName(VolumeId volumeId)
{
static char name[VMAXPATHLEN];
- (void)afs_snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
+ snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
return name;
}
int
VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
{
- return afs_snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
+ return snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
}
#define ENUMTOSTRING(en) #en
#define ENUMCASE(en) \
- case en: \
- return ENUMTOSTRING(en); \
- break
+ case en: return ENUMTOSTRING(en)
static char *
vlru_idx_to_string(int idx)