#include <afs/errors.h>
#include "daemon_com.h"
#include "fssync.h"
+#include "salvsync.h"
#include "lwp.h"
#include "lock.h"
#include <afs/afssyscalls.h>
"FSSYNC", /* protocol name string */
};
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * a queue of volume pointers to salvage in the background.
+ */
+struct fsync_salv_node {
+ struct rx_queue q;
+ Volume *vp; /**< volume to salvage */
+ unsigned char update_salv_prio; /**< whether we should update the salvage priority or not */
+};
+static struct {
+ struct rx_queue head;
+ pthread_cond_t cv;
+} fsync_salv;
+
+static void * FSYNC_salvageThread(void *);
+static void FSYNC_backgroundSalvage(Volume *vp);
+#endif /* AFS_DEMAND_ATTACH_FS */
/* Forward declarations */
static void * FSYNC_sync(void *);
(FSYNC_sync, USUAL_STACK_SIZE, USUAL_PRIORITY, (void *)0,
"FSYNC_sync", &pid) == LWP_SUCCESS);
#endif /* AFS_PTHREAD_ENV */
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ queue_Init(&fsync_salv.head);
+ assert(pthread_cond_init(&fsync_salv.cv, NULL) == 0);
+ assert(pthread_create(&tid, &tattr, FSYNC_salvageThread, NULL) == 0);
+#endif /* AFS_DEMAND_ATTACH_FS */
}
#if defined(HAVE_POLL) && defined(AFS_PTHREAD_ENV)
return NULL; /* hush now, little gcc */
}
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * thread for salvaging volumes in the background.
+ *
+ * Since FSSYNC handlers cannot issue SALVSYNC requests in order to avoid
+ * deadlock issues, this thread exists so code in the FSSYNC handler thread
+ * can hand off volumes to be salvaged in the background.
+ *
+ * @param[in] args unused
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static void *
+FSYNC_salvageThread(void * args)
+{
+ Volume *vp;
+ struct fsync_salv_node *node;
+
+ VOL_LOCK;
+
+ for (;;) {
+ while (queue_IsEmpty(&fsync_salv.head)) {
+ VOL_CV_WAIT(&fsync_salv.cv);
+ }
+
+ node = queue_First(&fsync_salv.head, fsync_salv_node);
+ queue_Remove(node);
+
+ vp = node->vp;
+ if (node->update_salv_prio) {
+ if (VUpdateSalvagePriority_r(vp)) {
+ ViceLog(0, ("FSYNC_salvageThread: unable to raise salvage priority "
+ "for volume %lu\n", afs_printable_uint32_lu(vp->hashid)));
+ }
+ }
+
+ free(node);
+ node = NULL;
+
+ VCancelReservation_r(vp);
+ }
+
+ VOL_UNLOCK;
+
+ return NULL;
+}
+
+/**
+ * salvage a volume in the background.
+ *
+ * Salvages cannot be scheduled directly from the main FSYNC thread, so
+ * instead call this function to schedule a salvage asynchronously in the
+ * FSYNC_salvageThread thread.
+ *
+ * @param[in] vp volume to pointer to salvage
+ *
+ * @pre VOL_LOCK held
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static void
+FSYNC_backgroundSalvage(Volume *vp)
+{
+ struct fsync_salv_node *node;
+ Error ec;
+
+ VCreateReservation_r(vp);
+
+ node = malloc(sizeof(struct fsync_salv_node));
+ node->vp = vp;
+
+ /* Save this value, to know if we should VUpdateSalvagePriority_r.
+ * We need to save it here, snce VRequestSalvage_r will change it. */
+ node->update_salv_prio = vp->salvage.requested;
+
+ if (VRequestSalvage_r(&ec, vp, SALVSYNC_ERROR, 0)) {
+ ViceLog(0, ("FSYNC_backgroundSalvage: unable to request salvage for volume %lu\n",
+ afs_printable_uint32_lu(vp->hashid)));
+ }
+
+ queue_Append(&fsync_salv.head, node);
+ assert(pthread_cond_broadcast(&fsync_salv.cv) == 0);
+}
+#endif /* AFS_DEMAND_ATTACH_FS */
+
static void
FSYNC_newconnection(osi_socket afd)
{
/* enforce mutual exclusion for volume ops */
if (vp->pending_vol_op) {
if (vp->pending_vol_op->com.programType != type) {
+ if (vp->pending_vol_op->com.command == FSYNC_VOL_OFF &&
+ vp->pending_vol_op->com.reason == FSYNC_SALVAGE) {
+
+ Log("denying offline request for volume %lu; volume is salvaging\n",
+ afs_printable_uint32_lu(vp->hashid));
+
+ res->hdr.reason = FSYNC_SALVAGE;
+ goto deny;
+ }
Log("volume %u already checked out\n", vp->hashid);
/* XXX debug */
Log("vp->vop = { com = { ver=%u, prog=%d, com=%d, reason=%d, len=%u, flags=0x%x }, vop = { vol=%u, part='%s' } }\n",
/* filter based upon requestor
*
- * volume utilities are not allowed to check out volumes
- * which are in an error state
+ * volume utilities / volserver are not allowed to check out
+ * volumes which are in an error state
*
* unknown utility programs will be denied on principal
*/
break;
case volumeUtility:
+ case volumeServer:
+ if (V_attachState(vp) == VOL_STATE_SALVAGING ||
+ vp->salvage.requested) {
+
+ Log("denying offline request for volume %lu; volume is in salvaging state\n",
+ afs_printable_uint32_lu(vp->hashid));
+ res->hdr.reason = FSYNC_SALVAGE;
+
+ /* the volume hasn't been checked out yet by the salvager,
+ * but we think the volume is salvaging; schedule a
+ * a salvage to update the salvage priority */
+ FSYNC_backgroundSalvage(vp);
+
+ goto deny;
+ }
if (VIsErrorState(V_attachState(vp))) {
goto deny;
}
- if (vp->salvage.requested) {
- goto deny;
- }
break;
default:
vcom->hdr->reason == V_CLONE ? "clone" :
vcom->hdr->reason == V_READONLY ? "readonly" :
vcom->hdr->reason == V_DUMP ? "dump" :
+ vcom->hdr->reason == FSYNC_SALVAGE ? "salvage" :
"UNKNOWN");
}
#ifdef AFS_DEMAND_ATTACH_FS
if (FSYNC_partMatch(vcom, vp, 0)) {
/* null out salvsync control state, as it's no longer relevant */
memset(&vp->salvage, 0, sizeof(vp->salvage));
- VChangeState_r(vp, VOL_STATE_ERROR);
+ VDeregisterVolOp_r(vp);
+
+ if (vcom->hdr->reason == FSYNC_SALVAGE) {
+ FSYNC_backgroundSalvage(vp);
+ } else {
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ }
+
code = SYNC_OK;
} else {
res->hdr.reason = FSYNC_WRONG_PART;
/* online salvager */
static int VCheckSalvage(register Volume * vp);
-static int VUpdateSalvagePriority_r(Volume * vp);
#ifdef SALVSYNC_BUILD_CLIENT
static int VScheduleSalvage_r(Volume * vp);
#endif
opts->nLargeVnodes = 0;
opts->nSmallVnodes = 0;
+ opts->canScheduleSalvage = 1;
opts->canUseFSSYNC = 1;
break;
DiskToVolumeHeader(&iheader, &diskHeader);
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) {
+ SYNC_response res;
+ memset(&res, 0, sizeof(res));
+
VOL_LOCK;
- if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, NULL)
+ if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, &res)
!= SYNC_OK) {
- Log("VAttachVolume: attach of volume %u apparently denied by file server\n", iheader.id);
- *ec = VNOVOL; /* XXXX */
+
+ if (res.hdr.reason == FSYNC_SALVAGE) {
+ Log("VAttachVolume: file server says volume %u is salvaging\n",
+ iheader.id);
+ *ec = VSALVAGING;
+ } else {
+ Log("VAttachVolume: attach of volume %u apparently denied by file server\n",
+ iheader.id);
+ *ec = VNOVOL; /* XXXX */
+ }
+
goto done;
}
VOL_UNLOCK;
if (!vp) {
vp = (Volume *) calloc(1, sizeof(Volume));
assert(vp != NULL);
+ vp->hashid = volumeId;
vp->device = partp->device;
vp->partition = partp;
queue_Init(&vp->vnode_list);
#ifdef FSSYNC_BUILD_CLIENT
if (VCanUseFSSYNC() && vp == NULL &&
mode != V_SECRETLY && mode != V_PEEK) {
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* If we couldn't attach but we scheduled a salvage, we already
+ * notified the fileserver; don't online it now */
+ if (*ec != VSALVAGING)
+#endif /* AFS_DEMAND_ATTACH_FS */
FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_ON, 0, NULL);
} else
#endif
#if defined(AFS_DEMAND_ATTACH_FS)
if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) {
VOL_LOCK;
- if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- vp->nUsers = 0;
- } else {
+ if (!VCanScheduleSalvage()) {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
- FreeVolume(vp);
- *ec = VSALVAGE;
}
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+
+ VCheckFree(vp);
return NULL;
} else if (*ec) {
/* volume operation in progress */
VOL_LOCK;
+ VCheckFree(vp);
return NULL;
}
#else /* AFS_DEMAND_ATTACH_FS */
vp->specialStatus = 0;
VOL_LOCK;
#if defined(AFS_DEMAND_ATTACH_FS)
- if (VCanScheduleSalvage()) {
- VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
- vp->nUsers = 0;
- } else {
+ if (!VCanScheduleSalvage()) {
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
- FreeVolume(vp);
- *ec = VSALVAGE;
}
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ vp->nUsers = 0;
+
+ VCheckFree(vp);
#else /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
*ec = VSALVAGE;
VUpdateVolume_r(ec, vp, 0);
}
#if defined(AFS_DEMAND_ATTACH_FS)
+ if (!VCanScheduleSalvage()) {
+ Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
+ }
VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
+
+ VCheckFree(vp);
#else /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
FreeVolume(vp);
#ifdef AFS_DEMAND_ATTACH_FS
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
+ VCheckFree(vp);
#else /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
#endif /* AFS_DEMAND_ATTACH_FS */
/* online salvager routines */
/***************************************************/
#if defined(AFS_DEMAND_ATTACH_FS)
-#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */
-#define SALVAGE_COUNT_MAX 16 /**< number of online salvages we
- * allow before moving the volume
- * into a permanent error state
- *
- * once this threshold is reached,
- * the operator will have to manually
- * issue a 'bos salvage' to bring
- * the volume back online
- */
-
/**
* check whether a salvage needs to be performed on this volume.
*
return 1;
}
+ if (programType != fileServer) {
+#ifdef FSSYNC_BUILD_CLIENT
+ if (VCanUseFSSYNC()) {
+ /*
+ * If we aren't the fileserver, tell the fileserver the volume
+ * needs to be salvaged. We could directly tell the
+ * salvageserver, but the fileserver keeps track of some stats
+ * related to salvages, and handles some other salvage-related
+ * complications for us.
+ */
+
+ /*
+ * You might wonder why we don't check for
+ * VIsSalvager(V_inUse(vp)) here, since we do check for that
+ * in the fileServer case (below). The reason is that the
+ * below check is done since the fileServer can't tell if a
+ * salvage is still running or not when V_inUse refers to a
+ * salvaging program. However, if we are a non-fileserver,
+ * to get here we must have checked out the volume from the
+ * fileserver and locked the partition, meaning there must
+ * be no salvager running; so we just always try to salvage
+ */
+
+ code = FSYNC_VolOp(vp->hashid, vp->partition->name,
+ FSYNC_VOL_FORCE_ERROR, FSYNC_SALVAGE, NULL);
+ if (code == SYNC_OK) {
+ *ec = VSALVAGING;
+ return 0;
+ }
+ Log("VRequestSalvage: force error salvage state of volume %u"
+ " denied by fileserver\n", vp->hashid);
+
+ /* fall through to error condition below */
+ }
+#endif /* FSSYNC_BUILD_CLIENT */
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ *ec = VSALVAGE;
+ return 1;
+ }
+
if (!vp->salvage.requested) {
vp->salvage.requested = 1;
vp->salvage.reason = reason;
vp->stats.last_salvage = FT_ApproxTime();
- if (VIsSalvager(V_inUse(vp))) {
+
+ if (vp->header && VIsSalvager(V_inUse(vp))) {
+ /* Right now we can't tell for sure if this indicates a
+ * salvage is running, or if a running salvage crashed, so
+ * we always ERROR the volume in case a salvage is running.
+ * Once we get rid of the partition lock and instead lock
+ * individual volume header files for salvages, we will
+ * probably be able to tell if a salvage is running, and we
+ * can do away with this behavior. */
Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid);
Log(" didn't request a salvage. Forcing it offline waiting for the\n");
Log(" salvage to finish; if you are sure no salvage is running,\n");
*ec = VSALVAGING;
} else {
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
+
+ /* make sure neither VScheduleSalvage_r nor
+ * VUpdateSalvagePriority_r try to schedule another salvage */
+ vp->salvage.requested = vp->salvage.scheduled = 0;
+
VChangeState_r(vp, VOL_STATE_ERROR);
*ec = VSALVAGE;
code = 1;
*
* @internal volume package internal use only.
*/
-static int
+int
VUpdateSalvagePriority_r(Volume * vp)
{
int ret=0;
afs_uint32 last_vol_op; /**< unix timestamp of last volume operation */
} VolumeStats;
+
+#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */
+#define SALVAGE_COUNT_MAX 16 /**< number of online salvages we
+ * allow before moving the volume
+ * into a permanent error state
+ *
+ * once this threshold is reached,
+ * the operator will have to manually
+ * issue a 'bos salvage' to bring
+ * the volume back online
+ */
+
/**
* DAFS online salvager state.
*/
extern void VLRU_SetOptions(int option, afs_uint32 val);
extern int VSetVolHashSize(int logsize);
extern int VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags);
+extern int VUpdateSalvagePriority_r(Volume * vp);
extern int VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
extern int VDeregisterVolOp_r(Volume * vp);
extern void VCancelReservation_r(Volume * vp);
#include <afs/dir.h>
#include <afs/afsutil.h>
#include <afs/vol_prototypes.h>
+#include <afs/errors.h>
#include "volser.h"
#include "voltrans_inline.h"
return 0;
}
+static struct Volume *
+VAttachVolumeByName_retry(Error *ec, char *partition, char *name, int mode)
+{
+ struct Volume *vp;
+
+ *ec = 0;
+ vp = VAttachVolumeByName(ec, partition, name, mode);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ {
+ int i;
+ /*
+ * The fileserver will take care of keeping track of how many
+ * demand-salvages have been performed, and will force the volume to
+ * ERROR if we've done too many. The limit on This loop is just a
+ * failsafe to prevent trying to salvage forever. We want to attempt
+ * attachment at least SALVAGE_COUNT_MAX times, since we want to
+ * avoid prematurely exiting this loop, if we can.
+ */
+ for (i = 0; i < SALVAGE_COUNT_MAX*2 && *ec == VSALVAGING; i++) {
+ sleep(SALVAGE_PRIO_UPDATE_INTERVAL);
+ vp = VAttachVolumeByName(ec, partition, name, mode);
+ }
+
+ if (*ec == VSALVAGING) {
+ *ec = VSALVAGE;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ return vp;
+}
+
+static struct Volume *
+VAttachVolume_retry(Error *ec, afs_uint32 avolid, int amode)
+{
+ struct Volume *vp;
+
+ *ec = 0;
+ vp = VAttachVolume(ec, avolid, amode);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ {
+ int i;
+ /* see comment above in VAttachVolumeByName_retry */
+ for (i = 0; i < SALVAGE_COUNT_MAX*2 && *ec == VSALVAGING; i++) {
+ sleep(SALVAGE_PRIO_UPDATE_INTERVAL);
+ vp = VAttachVolume(ec, avolid, amode);
+ }
+
+ if (*ec == VSALVAGING) {
+ *ec = VSALVAGE;
+ }
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+ return vp;
+}
+
/* the only attach function that takes a partition is "...ByName", so we use it */
-struct Volume *
+static struct Volume *
XAttachVolume(afs_int32 *error, afs_uint32 avolid, afs_int32 apartid, int amode)
{
char pbuf[30], vbuf[20];
- register struct Volume *tv;
if (ConvertPartition(apartid, pbuf, sizeof(pbuf))) {
*error = EINVAL;
*error = EINVAL;
return NULL;
}
- tv = VAttachVolumeByName((Error *)error, pbuf, vbuf, amode);
- return tv;
+
+ return VAttachVolumeByName_retry((Error *)error, pbuf, vbuf, amode);
}
/* Adapted from the file server; create a root directory for this volume */
if (purgeId) {
- purgevp = VAttachVolume(&error, purgeId, V_VOLUPD);
+ purgevp = VAttachVolume_retry(&error, purgeId, V_VOLUPD);
if (error) {
Log("1 Volser: Clone: Could not attach 'purge' volume %u; clone aborted\n", purgeId);
goto fail;
goto fail;
}
- clonevp = VAttachVolume(&error, cloneId, V_VOLUPD);
+ clonevp = VAttachVolume_retry(&error, cloneId, V_VOLUPD);
if (error) {
Log("1 Volser: can't attach clone %d\n", cloneId);
goto fail;
}
/* Get volume from volserver */
- tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
+ tv = VAttachVolumeByName_retry(&error, pname, volname, V_PEEK);
if (error) {
Log("1 Volser: GetVolInfo: Could not attach volume %u (%s:%s) error=%d\n",
volumeId, pname, volname, error);