#include <afsconfig.h>
#include <afs/param.h>
+#include <roken.h>
+#include <afs/opr.h>
+
+#include <ctype.h>
+#include <stddef.h>
+
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
#include <rx/xdr.h>
#include <afs/afsint.h>
-#include <ctype.h>
-#include <signal.h>
+
#ifndef AFS_NT40_ENV
-#include <sys/param.h>
#if !defined(AFS_SGI_ENV)
#ifdef AFS_OSF_ENV
#include <ufs/fs.h>
#endif
#endif
#else /* AFS_VFSINCL_ENV */
-#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV)
+#if !defined(AFS_AIX_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
#include <sys/fs.h>
#endif
#endif /* AFS_VFSINCL_ENV */
#endif /* AFS_OSF_ENV */
#endif /* AFS_SGI_ENV */
-#endif /* AFS_NT40_ENV */
-#include <errno.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#ifdef AFS_NT40_ENV
-#include <fcntl.h>
-#else
-#include <sys/file.h>
-#endif
-#include <dirent.h>
+#endif /* !AFS_NT40_ENV */
+
#ifdef AFS_AIX_ENV
#include <sys/vfs.h>
-#include <fcntl.h>
#else
#ifdef AFS_HPUX_ENV
-#include <fcntl.h>
#include <mntent.h>
#else
#if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
#else
#ifndef AFS_NT40_ENV
#if defined(AFS_SGI_ENV)
-#include <fcntl.h>
#include <mntent.h>
-
#else
#ifndef AFS_LINUX20_ENV
#include <fstab.h> /* Need to find in libc 5, present in libc 6 */
#endif
#endif /* AFS_HPUX_ENV */
#endif
-#ifndef AFS_NT40_ENV
-#include <netdb.h>
-#include <netinet/in.h>
-#include <sys/wait.h>
-#include <setjmp.h>
-#ifndef ITIMER_REAL
-#include <sys/time.h>
-#endif /* ITIMER_REAL */
-#endif /* AFS_NT40_ENV */
-#if defined(AFS_SUN5_ENV) || defined(AFS_NT40_ENV) || defined(AFS_LINUX20_ENV)
-#include <string.h>
-#else
-#include <strings.h>
-#endif
#include "nfs.h"
#include <afs/errors.h>
#include <afs/afssyscalls.h>
#include "ihandle.h"
#include <afs/afsutil.h>
-#ifdef AFS_NT40_ENV
-#include <io.h>
-#endif
#include "daemon_com.h"
#include "fssync.h"
#include "salvsync.h"
#include "partition.h"
#include "volume_inline.h"
#include "common.h"
-#include "afs/afs_assert.h"
#include "vutils.h"
-#ifndef AFS_NT40_ENV
#include <afs/dir.h>
-#include <unistd.h>
-#endif
-
-#if !defined(offsetof)
-#include <stddef.h>
-#endif
-
-#ifdef O_LARGEFILE
-#define afs_stat stat64
-#define afs_fstat fstat64
-#define afs_open open64
-#else /* !O_LARGEFILE */
-#define afs_stat stat
-#define afs_fstat fstat
-#define afs_open open
-#endif /* !O_LARGEFILE */
#ifdef AFS_PTHREAD_ENV
pthread_mutex_t vol_glock_mutex;
static volatile sig_atomic_t vol_disallow_salvsync = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
+/**
+ * has VShutdown_r been called / is VShutdown_r running?
+ */
+static int vol_shutting_down = 0;
+
#ifdef AFS_OSF_ENV
extern void *calloc(), *realloc();
#endif
opts->canUseSALVSYNC = 0;
opts->interrupt_rxcall = NULL;
+ opts->offline_timeout = -1;
+ opts->offline_shutdown_timeout = -1;
+ opts->usage_threshold = 128;
+ opts->usage_rate_limit = 5;
#ifdef FAST_RESTART
opts->unsafe_attach = 1;
CV_BROADCAST(&vol_vinit_cond);
}
+static_inline void
+VLogOfflineTimeout(const char *type, afs_int32 timeout)
+{
+ if (timeout < 0) {
+ return;
+ }
+ if (timeout == 0) {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "immediately\n", type);
+ } else {
+ Log("VInitVolumePackage: Interrupting clients accessing %s "
+ "after %ld second%s\n", type, (long)timeout, timeout==1?"":"s");
+ }
+}
+
int
VInitVolumePackage2(ProgramType pt, VolumePackageOptions * opts)
{
programType = pt;
vol_opts = *opts;
+#ifndef AFS_PTHREAD_ENV
+ if (opts->offline_timeout != -1 || opts->offline_shutdown_timeout != -1) {
+ Log("VInitVolumePackage: offline_timeout and/or "
+ "offline_shutdown_timeout was specified, but the volume package "
+ "does not support these for LWP builds\n");
+ return -1;
+ }
+#endif
+ VLogOfflineTimeout("volumes going offline", opts->offline_timeout);
+ VLogOfflineTimeout("volumes going offline during shutdown",
+ opts->offline_shutdown_timeout);
+
memset(&VStats, 0, sizeof(VStats));
VStats.hdr_cache_size = 200;
Log("VShutdown: shutting down on-line volumes on %d partition%s...\n",
params.n_parts, params.n_parts > 1 ? "s" : "");
+ vol_shutting_down = 1;
+
if (vol_attach_threads > 1) {
/* prepare for parallel shutdown */
params.n_threads = vol_attach_threads;
}
Log("VShutdown: shutting down on-line volumes...\n");
+ vol_shutting_down = 1;
for (i = 0; i < VolumeHashTable.Size; i++) {
/* try to hold first volume in the hash table */
for (queue_Scan(&VolumeHashTable.Table[i],vp,np,Volume)) {
/* Header I/O routines */
/***************************************************/
+static const char *
+HeaderName(bit32 magic)
+{
+ switch (magic) {
+ case VOLUMEINFOMAGIC:
+ return "volume info";
+ case SMALLINDEXMAGIC:
+ return "small index";
+ case LARGEINDEXMAGIC:
+ return "large index";
+ case LINKTABLEMAGIC:
+ return "link table";
+ }
+ return "unknown";
+}
+
/* open a descriptor for the inode (h),
* read in an on-disk structure into buffer (to) of size (size),
* verify versionstamp in structure has magic (magic) and
{
struct versionStamp *vsn;
FdHandle_t *fdP;
+ afs_sfsize_t nbytes;
+ afs_ino_str_t stmp;
*ec = 0;
if (h == NULL) {
+ Log("ReadHeader: Null inode handle argument for %s header file.\n",
+ HeaderName(magic));
*ec = VSALVAGE;
return;
}
fdP = IH_OPEN(h);
if (fdP == NULL) {
+ Log("ReadHeader: Failed to open %s header file "
+ "(volume=%u, inode=%s); errno=%d\n", HeaderName(magic), h->ih_vid,
+ PrintInode(stmp, h->ih_ino), errno);
*ec = VSALVAGE;
return;
}
vsn = (struct versionStamp *)to;
- if (FDH_PREAD(fdP, to, size, 0) != size || vsn->magic != magic) {
+ nbytes = FDH_PREAD(fdP, to, size, 0);
+ if (nbytes < 0) {
+ Log("ReadHeader: Failed to read %s header file "
+ "(volume=%u, inode=%s); errno=%d\n", HeaderName(magic), h->ih_vid,
+ PrintInode(stmp, h->ih_ino), errno);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (nbytes != size) {
+ Log("ReadHeader: Incorrect number of bytes read from %s header file "
+ "(volume=%u, inode=%s); expected=%d, read=%d\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino), size,
+ (int)nbytes);
+ *ec = VSALVAGE;
+ FDH_REALLYCLOSE(fdP);
+ return;
+ }
+ if (vsn->magic != magic) {
+ Log("ReadHeader: Incorrect magic for %s header file "
+ "(volume=%u, inode=%s); expected=0x%x, read=0x%x\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino), magic,
+ vsn->magic);
*ec = VSALVAGE;
FDH_REALLYCLOSE(fdP);
return;
}
+
FDH_CLOSE(fdP);
/* Check is conditional, in case caller wants to inspect version himself */
if (version && vsn->version != version) {
+ Log("ReadHeader: Incorrect version for %s header file "
+ "(volume=%u, inode=%s); expected=%x, read=%x\n",
+ HeaderName(magic), h->ih_vid, PrintInode(stmp, h->ih_ino),
+ version, vsn->version);
*ec = VSALVAGE;
}
}
* - volume is in an error state
* - volume is pre-attached
*/
- Log("VPreattachVolumeByVp_r: volume %u not in quiescent state\n", vid);
+ Log("VPreattachVolumeByVp_r: volume %u not in quiescent state (state %u flags 0x%x)\n",
+ vid, V_attachState(vp), V_attachFlags(vp));
goto done;
} else if (vp) {
/* we're re-attaching a volume; clear out some old state */
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
if (!vp) {
VOL_UNLOCK;
- strcat(path, "/");
+ strcat(path, OS_DIRSEP);
strcat(path, name);
/* do volume attach
SYNC_response res;
memset(&res, 0, sizeof(res));
- if (FSYNC_VolOp(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode, &res)
+ if (FSYNC_VolOp(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode, &res)
!= SYNC_OK) {
if (res.hdr.reason == FSYNC_SALVAGE) {
#if defined(AFS_DEMAND_ATTACH_FS) && defined(FSSYNC_BUILD_CLIENT)
if (!peek && *ec == 0 && retry == 0 && VMustCheckoutVolume(mode)) {
- code = FSYNC_VerifyCheckout(volid, VPartitionPath(partp), FSYNC_VOL_NEEDVOLUME, mode);
+ code = FSYNC_VerifyCheckout(volid, partp->name, FSYNC_VOL_NEEDVOLUME, mode);
if (code == SYNC_DENIED) {
/* must retry checkout; fileserver no longer thinks we have
/* check to see if we should set the specialStatus flag */
if (VVolOpSetVBusy_r(vp, vp->pending_vol_op)) {
- vp->specialStatus = VBUSY;
+ /* don't overwrite specialStatus if it was already set to
+ * something else (e.g. VMOVED) */
+ if (!vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
}
break;
if (*ec == VNOVOL) {
/* if the volume doesn't exist, skip straight to 'error' so we don't
* request a salvage */
- goto unlocked_error;
+ VOL_LOCK;
+ goto error_notbroken;
}
if (!*ec) {
read_header = 1;
- vp->specialStatus = (byte) (isbusy ? VBUSY : 0);
+ /* ensure that we don't override specialStatus if it was set to
+ * something else (e.g. VMOVED) */
+ if (isbusy && !vp->specialStatus) {
+ vp->specialStatus = VBUSY;
+ }
vp->shuttingDown = 0;
vp->goingOffline = 0;
vp->nUsers = 1;
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
goto locked_error;
} else if (*ec) {
/* volume operation in progress */
- goto unlocked_error;
+ VOL_LOCK;
+ goto error_notbroken;
}
#else /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* AFS_DEMAND_ATTACH_FS */
if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
}
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* AFS_DEMAND_ATTACH_FS */
#if defined(AFS_DEMAND_ATTACH_FS)
/* schedule a salvage so the volume goes away on disk */
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
VChangeState_r(vp, VOL_STATE_ERROR);
vp->nUsers = 0;
forcefree = 1;
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: error getting bitmap for volume (%s)\n",
"%lu; needs salvage\n", (int)*ec,
afs_printable_uint32_lu(V_id(vp)));
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
- VOL_SALVAGE_NO_OFFLINE);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_NO_OFFLINE);
vp->nUsers = 0;
#else /* !AFS_DEMAND_ATTACH_FS */
*ec = VSALVAGE;
#ifdef AFS_DEMAND_ATTACH_FS
error_state = VOL_STATE_ERROR;
/* see if we can recover */
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, 0 /*flags*/);
#endif
}
#ifdef AFS_DEMAND_ATTACH_FS
return vp;
+#ifndef AFS_DEMAND_ATTACH_FS
unlocked_error:
+#endif
+
VOL_LOCK;
locked_error:
#ifdef AFS_DEMAND_ATTACH_FS
if (!VIsErrorState(V_attachState(vp))) {
+ if (VIsErrorState(error_state)) {
+ Log("attach2: forcing vol %u to error state (state %u flags 0x%x ec %d)\n",
+ vp->hashid, V_attachState(vp), V_attachFlags(vp), *ec);
+ }
VChangeState_r(vp, error_state);
}
#endif /* AFS_DEMAND_ATTACH_FS */
VReleaseVolumeHandles_r(vp);
}
+ error_notbroken:
#ifdef AFS_DEMAND_ATTACH_FS
VCheckSalvage(vp);
if (forcefree) {
#ifdef AFS_PTHREAD_ENV
+static struct timespec *shutdown_timeout;
+static pthread_once_t shutdown_timeout_once = PTHREAD_ONCE_INIT;
+
static_inline int
VTimedOut(const struct timespec *ts)
{
return 1;
}
+/**
+ * Calculate an absolute timeout.
+ *
+ * @param[out] ts A timeout that is "timeout" seconds from now, if we return
+ * NULL, the memory is not touched
+ * @param[in] timeout How long the timeout should be from now
+ *
+ * @return timeout to use
+ * @retval NULL no timeout; wait forever
+ * @retval non-NULL the given value for "ts"
+ *
+ * @internal
+ */
+static struct timespec *
+VCalcTimeout(struct timespec *ts, afs_int32 timeout)
+{
+ struct timeval now;
+ int code;
+
+ if (timeout < 0) {
+ return NULL;
+ }
+
+ if (timeout == 0) {
+ ts->tv_sec = ts->tv_nsec = 0;
+ return ts;
+ }
+
+ code = gettimeofday(&now, NULL);
+ if (code) {
+ Log("Error %d from gettimeofday, falling back to 'forever' timeout\n", errno);
+ return NULL;
+ }
+
+ ts->tv_sec = now.tv_sec + timeout;
+ ts->tv_nsec = now.tv_usec * 1000;
+
+ return ts;
+}
+
+/**
+ * Initialize the shutdown_timeout global.
+ */
+static void
+VShutdownTimeoutInit(void)
+{
+ struct timespec *ts;
+
+ ts = malloc(sizeof(*ts));
+
+ shutdown_timeout = VCalcTimeout(ts, vol_opts.offline_shutdown_timeout);
+
+ if (!shutdown_timeout) {
+ free(ts);
+ }
+}
+
+/**
+ * Figure out the timeout that should be used for waiting for offline volumes.
+ *
+ * @param[out] ats Storage space for a local timeout value if needed
+ *
+ * @return The timeout value that should be used
+ * @retval NULL No timeout; wait forever for offlining volumes
+ * @retval non-NULL A pointer to the absolute time that should be used as
+ * the deadline for waiting for offlining volumes.
+ *
+ * @note If we return non-NULL, the pointer we return may or may not be the
+ * same as "ats"
+ */
+static const struct timespec *
+VOfflineTimeout(struct timespec *ats)
+{
+ if (vol_shutting_down) {
+ osi_Assert(pthread_once(&shutdown_timeout_once, VShutdownTimeoutInit) == 0);
+ return shutdown_timeout;
+ } else {
+ return VCalcTimeout(ats, vol_opts.offline_timeout);
+ }
+}
+
#else /* AFS_PTHREAD_ENV */
/* Waiting a certain amount of time for offlining volumes is not supported
* for LWP due to a lack of primitives. So, we never time out */
# define VTimedOut(x) (0)
+# define VOfflineTimeout(x) (NULL)
#endif /* !AFS_PTHREAD_ENV */
return retVal;
}
-/* same as VGetVolume, but if a volume is waiting to go offline, we only wait
- * until time ts. If we have waited longer than that, we return that it is
- * actually offline, instead of waiting for it to go offline */
-Volume *
-VGetVolumeTimed(Error * ec, Error * client_ec, VolId volumeId,
- const struct timespec *ts)
-{
- Volume *retVal;
- VOL_LOCK;
- retVal = GetVolume(ec, client_ec, volumeId, NULL, ts);
- VOL_UNLOCK;
- return retVal;
-}
-
/**
* Get a volume reference associated with an RX call.
*
}
if (V_attachState(vp) == VOL_STATE_PREATTACHED) {
+ if (vp->specialStatus) {
+ *ec = vp->specialStatus;
+ vp = NULL;
+ break;
+ }
avp = VAttachVolumeByVp_r(ec, vp, 0);
if (avp) {
if (vp != avp) {
vp = NULL;
break;
}
-#endif
-#ifdef AFS_DEMAND_ATTACH_FS
+ if (VIsErrorState(V_attachState(vp))) {
+ /* make sure we don't take a vp in VOL_STATE_ERROR state and use
+ * it, or transition it out of that state */
+ if (!*ec) {
+ *ec = VNOVOL;
+ }
+ vp = NULL;
+ break;
+ }
+
/*
* this test MUST happen after VAttachVolymeByVp, so vol_op_state is
* not VolOpRunningUnknown (attach2 would have converted it to Online
vp->hashid);
#ifdef AFS_DEMAND_ATTACH_FS
if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0 /*flags*/);
} else {
FreeVolume(vp);
vp = NULL;
}
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0 /*flags*/);
#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
VOL_UNLOCK;
}
+/**
+ * Iterate over the RX calls associated with a volume, and interrupt them.
+ *
+ * @param[in] vp The volume whose RX calls we want to scan
+ *
+ * @pre VOL_LOCK held
+ */
+static void
+VScanCalls_r(struct Volume *vp)
+{
+ struct VCallByVol *cbv, *ncbv;
+ afs_int32 err;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState state_save;
+#endif
+
+ if (queue_IsEmpty(&vp->rx_call_list))
+ return; /* no calls to interrupt */
+ if (!vol_opts.interrupt_rxcall)
+ return; /* we have no function with which to interrupt calls */
+ err = VIsGoingOffline_r(vp);
+ if (!err)
+ return; /* we're not going offline anymore */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VWaitExclusiveState_r(vp);
+ state_save = VChangeState_r(vp, VOL_STATE_SCANNING_RXCALLS);
+ VOL_UNLOCK;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ for(queue_Scan(&vp->rx_call_list, cbv, ncbv, VCallByVol)) {
+ if (LogLevel > 0) {
+ struct rx_peer *peer;
+ char hoststr[16];
+ peer = rx_PeerOf(rx_ConnectionOf(cbv->call));
+
+ Log("Offlining volume %lu while client %s:%u is trying to read "
+ "from it; kicking client off with error %ld\n",
+ (long unsigned) vp->hashid,
+ afs_inet_ntoa_r(rx_HostOf(peer), hoststr),
+ (unsigned) ntohs(rx_PortOf(peer)),
+ (long) err);
+ }
+ (*vol_opts.interrupt_rxcall) (cbv->call, err);
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+ VChangeState_r(vp, state_save);
+#endif /* AFS_DEMAND_ATTACH_FS */
+}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * Wait for a vp to go offline.
+ *
+ * @param[out] ec 1 if a salvage on the volume has been requested and
+ * salvok == 0, 0 otherwise
+ * @param[in] vp The volume to wait for
+ * @param[in] salvok If 0, we return immediately with *ec = 1 if the volume
+ * has been requested to salvage. Otherwise we keep waiting
+ * until the volume has gone offline.
+ *
+ * @pre VOL_LOCK held
+ * @pre caller holds a lightweight ref on vp
+ *
+ * @note DAFS only
+ */
+static void
+VWaitForOfflineByVp_r(Error *ec, struct Volume *vp, int salvok)
+{
+ struct timespec timeout_ts;
+ const struct timespec *ts;
+ int timedout = 0;
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ *ec = 0;
+
+ while (!VIsOfflineState(V_attachState(vp)) && !timedout) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+ VTimedWaitStateChange_r(vp, ts, &timedout);
+ }
+ if (!timedout) {
+ /* we didn't time out, so the volume must be offline, so we're done */
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+
+ while (!VIsOfflineState(V_attachState(vp))) {
+ if (!salvok && vp->salvage.requested) {
+ *ec = 1;
+ return;
+ }
+
+ VWaitStateChange_r(vp);
+ }
+}
+
+#else /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * Wait for a volume to go offline.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @note non-DAFS only (for DAFS, use @see WaitForOfflineByVp_r)
+ */
+static void
+VWaitForOffline_r(Error *ec, VolumeId volid)
+{
+ struct Volume *vp;
+ const struct timespec *ts;
+#ifdef AFS_PTHREAD_ENV
+ struct timespec timeout_ts;
+#endif
+
+ ts = VOfflineTimeout(&timeout_ts);
+
+ vp = GetVolume(ec, NULL, volid, NULL, ts);
+ if (!vp) {
+ /* error occurred so bad that we can't even get a vp; we have no
+ * information on the vol so we don't know whether to wait, so just
+ * return */
+ return;
+ }
+ if (!VIsGoingOffline_r(vp)) {
+ /* volume is no longer going offline, so we're done */
+ VPutVolume_r(vp);
+ return;
+ }
+
+ /* If we got here, we timed out waiting for the volume to go offline.
+ * Kick off the accessing RX calls and wait again */
+
+ VScanCalls_r(vp);
+ VPutVolume_r(vp);
+ vp = NULL;
+
+ vp = VGetVolume_r(ec, volid);
+ if (vp) {
+ /* In case it was reattached... */
+ VPutVolume_r(vp);
+ }
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/* The opposite of VAttachVolume. The volume header is written to disk, with
the inUse bit turned off. A copy of the header is maintained in memory,
however (which is why this is VOffline, not VDetach).
void
VOffline_r(Volume * vp, char *message)
{
-#ifndef AFS_DEMAND_ATTACH_FS
Error error;
+#ifndef AFS_DEMAND_ATTACH_FS
VolumeId vid = V_id(vp);
#endif
VChangeState_r(vp, VOL_STATE_GOING_OFFLINE);
VCreateReservation_r(vp);
VPutVolume_r(vp);
-
- /* wait for the volume to go offline */
- if (V_attachState(vp) == VOL_STATE_GOING_OFFLINE) {
- VWaitStateChange_r(vp);
- }
+ VWaitForOfflineByVp_r(&error, vp, 1);
VCancelReservation_r(vp);
#else /* AFS_DEMAND_ATTACH_FS */
VPutVolume_r(vp);
- vp = VGetVolume_r(&error, vid); /* Wait for it to go offline */
- if (vp) /* In case it was reattached... */
- VPutVolume_r(vp);
+ VWaitForOffline_r(&error, vid);
#endif /* AFS_DEMAND_ATTACH_FS */
}
void
VOfflineForVolOp_r(Error *ec, Volume *vp, char *message)
{
+ int salvok = 1;
osi_Assert(vp->pending_vol_op);
if (!V_inUse(vp)) {
VPutVolume_r(vp);
VCreateReservation_r(vp);
VPutVolume_r(vp);
- /* Wait for the volume to go offline */
- while (!VIsOfflineState(V_attachState(vp))) {
+ if (vp->pending_vol_op->com.programType != salvageServer) {
/* do not give corrupted volumes to the volserver */
- if (vp->salvage.requested && vp->pending_vol_op->com.programType != salvageServer) {
- *ec = 1;
- goto error;
- }
- VWaitStateChange_r(vp);
+ salvok = 0;
}
+
*ec = 0;
- error:
+ VWaitForOfflineByVp_r(ec, vp, salvok);
+
VCancelReservation_r(vp);
}
#endif /* AFS_DEMAND_ATTACH_FS */
notifyServer = vp->needsPutBack;
if (V_destroyMe(vp) == DESTROY_ME)
useDone = FSYNC_VOL_LEAVE_OFF;
-#ifdef AFS_DEMAND_ATTACH_FS
+# ifdef AFS_DEMAND_ATTACH_FS
else if (!V_blessed(vp) || !V_inService(vp))
useDone = FSYNC_VOL_LEAVE_OFF;
-#endif
+# endif
+ }
+# ifdef AFS_DEMAND_ATTACH_FS
+ if (V_needsSalvaged(vp)) {
+ notifyServer = 0;
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, 0);
}
+# endif
tpartp = vp->partition;
volume = V_id(vp);
#endif /* FSSYNC_BUILD_CLIENT */
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_OFFLINING);
+
+ VOL_UNLOCK;
#endif
- /* demand attach fs
- *
- * XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex...
- *
- * VCloseVnodeFiles_r drops the glock internally */
DFlushVolume(vp->hashid);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ /* DAFS: VCloseVnodeFiles_r drops the glock internally */
VCloseVnodeFiles_r(vp);
#ifdef AFS_DEMAND_ATTACH_FS
VolState state_save;
state_save = VChangeState_r(vp, VOL_STATE_DETACHING);
+
+ VOL_UNLOCK;
#endif
- /* XXX need to investigate whether we can perform
- * DFlushVolume outside of vol_glock_mutex... */
DFlushVolume(vp->hashid);
- VReleaseVnodeFiles_r(vp); /* releases the glock internally */
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+
+ VReleaseVnodeFiles_r(vp); /* DAFS: releases the glock internally */
#ifdef AFS_DEMAND_ATTACH_FS
VOL_UNLOCK;
* @param[in] flags see flags note below
*
* @note flags:
- * VOL_SALVAGE_INVALIDATE_HEADER causes volume header cache entry
- * to be invalidated.
+ * VOL_SALVAGE_NO_OFFLINE do not need to wait to offline the volume; it has
+ * not been fully attached
*
* @pre VOL_LOCK is held.
*
VOfflineForSalvage_r(vp);
}
}
+ /* If we are non-fileserver, we're telling the fileserver to
+ * salvage the vol, so we don't need to give it back separately. */
+ vp->needsPutBack = 0;
+
*ec = VSALVAGING;
} else {
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
*ec = VSALVAGE;
code = 1;
}
- if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
- /* Instead of ReleaseVolumeHeader, we do FreeVolumeHeader()
- so that the the next VAttachVolumeByVp_r() invocation
- of attach2() will pull in a cached header
- entry and fail, then load a fresh one from disk and attach
- it to the volume.
- */
- FreeVolumeHeader(vp);
- }
}
return code;
}
VScheduleSalvage_r(Volume * vp)
{
int ret=0;
- int code;
+ int code = 0;
VolState state_save;
VThreadOptions_t * thread_opts;
char partName[16];
* set the volume to an exclusive state and drop the lock
* around the SALVSYNC call
*/
- strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
+ strlcpy(partName, vp->partition->name, sizeof(partName));
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
VOL_UNLOCK;
Log("VScheduleSalvage_r: Salvage request for volume %lu "
"denied\n", afs_printable_uint32_lu(vp->hashid));
break;
+ case SYNC_FAILED:
+ Log("VScheduleSalvage_r: Salvage request for volume %lu "
+ "failed\n", afs_printable_uint32_lu(vp->hashid));
+ break;
default:
Log("VScheduleSalvage_r: Salvage request for volume %lu "
"received unknown protocol error %d\n",
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0 /*flags*/);
#else /* AFS_DEMAND_ATTACH_FS */
DeleteVolumeFromHashTable(vp);
vp->shuttingDown = 1; /* Let who has it free it. */
}
void
-VFreeBitMapEntry_r(Error * ec, struct vnodeIndex *index,
- unsigned bitNumber)
+VFreeBitMapEntry_r(Error * ec, Volume *vp, struct vnodeIndex *index,
+ unsigned bitNumber, int flags)
{
unsigned int offset;
*ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (flags & VOL_FREE_BITMAP_WAIT) {
+ /* VAllocBitmapEntry_r allocs bitmap entries under an exclusive volume
+ * state, so ensure we're not in an exclusive volume state when we update
+ * the bitmap */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ }
+#endif
+
#ifdef BITMAP_LATER
if (!index->bitmap)
- return;
+ goto done;
#endif /* BITMAP_LATER */
+
offset = bitNumber >> 3;
if (offset >= index->bitmapSize) {
*ec = VNOVNODE;
- return;
+ goto done;
}
if (offset < index->bitmapOffset)
index->bitmapOffset = offset & ~3; /* Truncate to nearest bit32 */
*(index->bitmap + offset) &= ~(1 << (bitNumber & 0x7));
+
+ done:
+#ifdef AFS_DEMAND_ATTACH_FS
+ VCancelReservation_r(vp);
+#endif
+ return; /* make the compiler happy for non-DAFS */
}
void
-VFreeBitMapEntry(Error * ec, struct vnodeIndex *index,
+VFreeBitMapEntry(Error * ec, Volume *vp, struct vnodeIndex *index,
unsigned bitNumber)
{
VOL_LOCK;
- VFreeBitMapEntry_r(ec, index, bitNumber);
+ VFreeBitMapEntry_r(ec, vp, index, bitNumber, VOL_FREE_BITMAP_WAIT);
VOL_UNLOCK;
}
struct DiskPartition64 *dp;
*ec = 0;
- name[0] = '/';
- (void)afs_snprintf(&name[1], (sizeof name) - 1, VFORMAT, afs_printable_uint32_lu(volumeId));
+ name[0] = OS_DIRSEPC;
+ snprintf(&name[1], (sizeof name) - 1, VFORMAT,
+ afs_printable_uint32_lu(volumeId));
for (dp = DiskPartitionList; dp; dp = dp->next) {
- struct afs_stat status;
+ struct afs_stat_st status;
strcpy(path, VPartitionPath(dp));
strcat(path, name);
if (afs_stat(path, &status) == 0) {
* @return volume number
*
* @note the string must be of the form VFORMAT. the only permissible
- * deviation is a leading '/' character.
+ * deviation is a leading OS_DIRSEPC character.
*
* @see VFORMAT
*/
int
VolumeNumber(char *name)
{
- if (*name == '/')
+ if (*name == OS_DIRSEPC)
name++;
- return atoi(name + 1);
+ return strtoul(name + 1, NULL, 10);
}
/**
VolumeExternalName(VolumeId volumeId)
{
static char name[VMAXPATHLEN];
- (void)afs_snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
+ snprintf(name, sizeof name, VFORMAT, afs_printable_uint32_lu(volumeId));
return name;
}
int
VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
{
- return afs_snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
+ return snprintf(name, len, VFORMAT, afs_printable_uint32_lu(volumeId));
}
if (now - V_dayUseDate(vp) > OneDay)
VAdjustVolumeStatistics_r(vp);
/*
- * Save the volume header image to disk after every 128 bumps to dayUse.
+ * Save the volume header image to disk after a threshold of bumps to dayUse,
+ * at most every usage_rate_limit seconds.
*/
- if ((V_dayUse(vp)++ & 127) == 0) {
+ V_dayUse(vp)++;
+ vp->usage_bumps_outstanding++;
+ if (vp->usage_bumps_outstanding >= vol_opts.usage_threshold
+ && vp->usage_bumps_next_write <= now) {
Error error;
+ vp->usage_bumps_outstanding = 0;
+ vp->usage_bumps_next_write = now + vol_opts.usage_rate_limit;
VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT);
}
}
ReleaseVolumeHeader(hp++);
}
+/* get a volume header off of the volume header LRU.
+ *
+ * @return volume header
+ * @retval NULL no usable volume header is available on the LRU
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post for DAFS, if the returned header is associated with a volume, that
+ * volume is NOT in an exclusive state
+ *
+ * @internal volume package internal use only.
+ */
+#ifdef AFS_DEMAND_ATTACH_FS
+static struct volHeader*
+GetVolHeaderFromLRU(void)
+{
+ struct volHeader *hd = NULL, *qh, *nqh;
+ /* Usually, a volume in an exclusive state will not have its header on
+ * the LRU. However, it is possible for this to occur when a salvage
+ * request is received over FSSYNC, and possibly in other corner cases.
+ * So just skip over headers whose volumes are in an exclusive state. We
+ * could VWaitExclusiveState_r instead, but not waiting is faster and
+ * easier to do */
+ for (queue_Scan(&volume_hdr_LRU, qh, nqh, volHeader)) {
+ if (!qh->back || !VIsExclusiveState(V_attachState(qh->back))) {
+ queue_Remove(qh);
+ hd = qh;
+ break;
+ }
+ }
+ return hd;
+}
+#else /* AFS_DEMAND_ATTACH_FS */
+static struct volHeader*
+GetVolHeaderFromLRU(void)
+{
+ struct volHeader *hd = NULL;
+ if (queue_IsNotEmpty(&volume_hdr_LRU)) {
+ hd = queue_First(&volume_hdr_LRU, volHeader);
+ queue_Remove(hd);
+ }
+ return hd;
+}
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
/**
* get a volume header and attach it to the volume object.
*
V_attachFlags(vp) &= ~(VOL_HDR_IN_LRU);
#endif
} else {
- /* we need to grab a new element off the LRU */
- if (queue_IsNotEmpty(&volume_hdr_LRU)) {
- /* grab an element and pull off of LRU */
- hd = queue_First(&volume_hdr_LRU, volHeader);
- queue_Remove(hd);
- } else {
+ hd = GetVolHeaderFromLRU();
+ if (!hd) {
/* LRU is empty, so allocate a new volHeader
* this is probably indicative of a leak, so let the user know */
hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
* be sync'd out to disk */
#ifdef AFS_DEMAND_ATTACH_FS
- /* if hd->back were in an exclusive state, then
- * its volHeader would not be on the LRU... */
+ /* GetVolHeaderFromLRU had better not give us back a header
+ * with a volume in exclusive state... */
osi_Assert(!VIsExclusiveState(V_attachState(hd->back)));
#endif
void
VPrintCacheStats_r(void)
{
- afs_uint32 get_hi, get_lo, load_hi, load_lo;
struct VnodeClassInfo *vcp;
vcp = &VnodeClassInfo[vLarge];
Log("Large vnode cache, %d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
vcp = &VnodeClassInfo[vSmall];
Log("Small vnode cache,%d entries, %d allocs, %d gets (%d reads), %d writes\n", vcp->cacheSize, vcp->allocs, vcp->gets, vcp->reads, vcp->writes);
- SplitInt64(VStats.hdr_gets, get_hi, get_lo);
- SplitInt64(VStats.hdr_loads, load_hi, load_lo);
- Log("Volume header cache, %d entries, %d gets, %d replacements\n",
- VStats.hdr_cache_size, get_lo, load_lo);
+ Log("Volume header cache, %d entries, %"AFS_INT64_FMT" gets, "
+ "%"AFS_INT64_FMT" replacements\n",
+ VStats.hdr_cache_size, VStats.hdr_gets, VStats.hdr_loads);
}
void
#define ENUMTOSTRING(en) #en
#define ENUMCASE(en) \
- case en: \
- return ENUMTOSTRING(en); \
- break
+ case en: return ENUMTOSTRING(en)
static char *
vlru_idx_to_string(int idx)