From fe07f017f75b2ce0f783bbd8f31eb5b1bcd7c61b Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Fri, 6 Nov 2009 14:05:16 -0600 Subject: [PATCH] DAFS: Allow non-fileserver to schedule salvages Allow non-fileserver programs to schedule salvages through the fileserver via FSSYNC (VOL_FORCE_ERROR with the FSYNC_SALVAGE reason code). Also make the volserver schedule salvages this way when it encounters the appropriate errors. FIXES 124484 Change-Id: I03ecf6302436c35fec705cd6c84a40b7cdbf6f97 Reviewed-on: http://gerrit.openafs.org/787 Reviewed-by: Andrew Deason Tested-by: Andrew Deason Reviewed-by: Derrick Brashear --- src/vol/fssync-server.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++-- src/vol/volume.c | 124 +++++++++++++++++++++++++++++---------- src/vol/volume.h | 13 +++++ src/volser/volprocs.c | 73 ++++++++++++++++++++--- 4 files changed, 318 insertions(+), 42 deletions(-) diff --git a/src/vol/fssync-server.c b/src/vol/fssync-server.c index c88b9f0..b8d3194 100644 --- a/src/vol/fssync-server.c +++ b/src/vol/fssync-server.c @@ -72,6 +72,7 @@ #include #include "daemon_com.h" #include "fssync.h" +#include "salvsync.h" #include "lwp.h" #include "lock.h" #include @@ -120,6 +121,23 @@ static SYNC_server_state_t fssync_server_state = "FSSYNC", /* protocol name string */ }; +#ifdef AFS_DEMAND_ATTACH_FS +/** + * a queue of volume pointers to salvage in the background. + */ +struct fsync_salv_node { + struct rx_queue q; + Volume *vp; /**< volume to salvage */ + unsigned char update_salv_prio; /**< whether we should update the salvage priority or not */ +}; +static struct { + struct rx_queue head; + pthread_cond_t cv; +} fsync_salv; + +static void * FSYNC_salvageThread(void *); +static void FSYNC_backgroundSalvage(Volume *vp); +#endif /* AFS_DEMAND_ATTACH_FS */ /* Forward declarations */ static void * FSYNC_sync(void *); @@ -203,6 +221,12 @@ FSYNC_fsInit(void) (FSYNC_sync, USUAL_STACK_SIZE, USUAL_PRIORITY, (void *)0, "FSYNC_sync", &pid) == LWP_SUCCESS); #endif /* AFS_PTHREAD_ENV */ + +#ifdef AFS_DEMAND_ATTACH_FS + queue_Init(&fsync_salv.head); + assert(pthread_cond_init(&fsync_salv.cv, NULL) == 0); + assert(pthread_create(&tid, &tattr, FSYNC_salvageThread, NULL) == 0); +#endif /* AFS_DEMAND_ATTACH_FS */ } #if defined(HAVE_POLL) && defined(AFS_PTHREAD_ENV) @@ -306,6 +330,91 @@ FSYNC_sync(void * args) return NULL; /* hush now, little gcc */ } +#ifdef AFS_DEMAND_ATTACH_FS +/** + * thread for salvaging volumes in the background. + * + * Since FSSYNC handlers cannot issue SALVSYNC requests in order to avoid + * deadlock issues, this thread exists so code in the FSSYNC handler thread + * can hand off volumes to be salvaged in the background. + * + * @param[in] args unused + * + * @note DEMAND_ATTACH_FS only + */ +static void * +FSYNC_salvageThread(void * args) +{ + Volume *vp; + struct fsync_salv_node *node; + + VOL_LOCK; + + for (;;) { + while (queue_IsEmpty(&fsync_salv.head)) { + VOL_CV_WAIT(&fsync_salv.cv); + } + + node = queue_First(&fsync_salv.head, fsync_salv_node); + queue_Remove(node); + + vp = node->vp; + if (node->update_salv_prio) { + if (VUpdateSalvagePriority_r(vp)) { + ViceLog(0, ("FSYNC_salvageThread: unable to raise salvage priority " + "for volume %lu\n", afs_printable_uint32_lu(vp->hashid))); + } + } + + free(node); + node = NULL; + + VCancelReservation_r(vp); + } + + VOL_UNLOCK; + + return NULL; +} + +/** + * salvage a volume in the background. + * + * Salvages cannot be scheduled directly from the main FSYNC thread, so + * instead call this function to schedule a salvage asynchronously in the + * FSYNC_salvageThread thread. + * + * @param[in] vp volume to pointer to salvage + * + * @pre VOL_LOCK held + * + * @note DEMAND_ATTACH_FS only + */ +static void +FSYNC_backgroundSalvage(Volume *vp) +{ + struct fsync_salv_node *node; + Error ec; + + VCreateReservation_r(vp); + + node = malloc(sizeof(struct fsync_salv_node)); + node->vp = vp; + + /* Save this value, to know if we should VUpdateSalvagePriority_r. + * We need to save it here, snce VRequestSalvage_r will change it. */ + node->update_salv_prio = vp->salvage.requested; + + if (VRequestSalvage_r(&ec, vp, SALVSYNC_ERROR, 0)) { + ViceLog(0, ("FSYNC_backgroundSalvage: unable to request salvage for volume %lu\n", + afs_printable_uint32_lu(vp->hashid))); + } + + queue_Append(&fsync_salv.head, node); + assert(pthread_cond_broadcast(&fsync_salv.cv) == 0); +} +#endif /* AFS_DEMAND_ATTACH_FS */ + static void FSYNC_newconnection(osi_socket afd) { @@ -699,6 +808,15 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) /* enforce mutual exclusion for volume ops */ if (vp->pending_vol_op) { if (vp->pending_vol_op->com.programType != type) { + if (vp->pending_vol_op->com.command == FSYNC_VOL_OFF && + vp->pending_vol_op->com.reason == FSYNC_SALVAGE) { + + Log("denying offline request for volume %lu; volume is salvaging\n", + afs_printable_uint32_lu(vp->hashid)); + + res->hdr.reason = FSYNC_SALVAGE; + goto deny; + } Log("volume %u already checked out\n", vp->hashid); /* XXX debug */ Log("vp->vop = { com = { ver=%u, prog=%d, com=%d, reason=%d, len=%u, flags=0x%x }, vop = { vol=%u, part='%s' } }\n", @@ -735,8 +853,8 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) /* filter based upon requestor * - * volume utilities are not allowed to check out volumes - * which are in an error state + * volume utilities / volserver are not allowed to check out + * volumes which are in an error state * * unknown utility programs will be denied on principal */ @@ -752,12 +870,24 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) break; case volumeUtility: + case volumeServer: + if (V_attachState(vp) == VOL_STATE_SALVAGING || + vp->salvage.requested) { + + Log("denying offline request for volume %lu; volume is in salvaging state\n", + afs_printable_uint32_lu(vp->hashid)); + res->hdr.reason = FSYNC_SALVAGE; + + /* the volume hasn't been checked out yet by the salvager, + * but we think the volume is salvaging; schedule a + * a salvage to update the salvage priority */ + FSYNC_backgroundSalvage(vp); + + goto deny; + } if (VIsErrorState(V_attachState(vp))) { goto deny; } - if (vp->salvage.requested) { - goto deny; - } break; default: @@ -839,6 +969,7 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) vcom->hdr->reason == V_CLONE ? "clone" : vcom->hdr->reason == V_READONLY ? "readonly" : vcom->hdr->reason == V_DUMP ? "dump" : + vcom->hdr->reason == FSYNC_SALVAGE ? "salvage" : "UNKNOWN"); } #ifdef AFS_DEMAND_ATTACH_FS @@ -1087,7 +1218,14 @@ FSYNC_com_VolError(FSSYNC_VolOp_command * vcom, SYNC_response * res) if (FSYNC_partMatch(vcom, vp, 0)) { /* null out salvsync control state, as it's no longer relevant */ memset(&vp->salvage, 0, sizeof(vp->salvage)); - VChangeState_r(vp, VOL_STATE_ERROR); + VDeregisterVolOp_r(vp); + + if (vcom->hdr->reason == FSYNC_SALVAGE) { + FSYNC_backgroundSalvage(vp); + } else { + VChangeState_r(vp, VOL_STATE_ERROR); + } + code = SYNC_OK; } else { res->hdr.reason = FSYNC_WRONG_PART; diff --git a/src/vol/volume.c b/src/vol/volume.c index 46b238a..d966ad7 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -363,7 +363,6 @@ static void VVByPListWait_r(struct DiskPartition64 * dp); /* online salvager */ static int VCheckSalvage(register Volume * vp); -static int VUpdateSalvagePriority_r(Volume * vp); #ifdef SALVSYNC_BUILD_CLIENT static int VScheduleSalvage_r(Volume * vp); #endif @@ -470,6 +469,7 @@ VOptDefaults(ProgramType pt, VolumePackageOptions *opts) opts->nLargeVnodes = 0; opts->nSmallVnodes = 0; + opts->canScheduleSalvage = 1; opts->canUseFSSYNC = 1; break; @@ -2059,11 +2059,23 @@ VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode) DiskToVolumeHeader(&iheader, &diskHeader); #ifdef FSSYNC_BUILD_CLIENT if (VCanUseFSSYNC() && mode != V_SECRETLY && mode != V_PEEK) { + SYNC_response res; + memset(&res, 0, sizeof(res)); + VOL_LOCK; - if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, NULL) + if (FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_NEEDVOLUME, mode, &res) != SYNC_OK) { - Log("VAttachVolume: attach of volume %u apparently denied by file server\n", iheader.id); - *ec = VNOVOL; /* XXXX */ + + if (res.hdr.reason == FSYNC_SALVAGE) { + Log("VAttachVolume: file server says volume %u is salvaging\n", + iheader.id); + *ec = VSALVAGING; + } else { + Log("VAttachVolume: attach of volume %u apparently denied by file server\n", + iheader.id); + *ec = VNOVOL; /* XXXX */ + } + goto done; } VOL_UNLOCK; @@ -2073,6 +2085,7 @@ VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode) if (!vp) { vp = (Volume *) calloc(1, sizeof(Volume)); assert(vp != NULL); + vp->hashid = volumeId; vp->device = partp->device; vp->partition = partp; queue_Init(&vp->vnode_list); @@ -2125,6 +2138,12 @@ VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode) #ifdef FSSYNC_BUILD_CLIENT if (VCanUseFSSYNC() && vp == NULL && mode != V_SECRETLY && mode != V_PEEK) { + +#ifdef AFS_DEMAND_ATTACH_FS + /* If we couldn't attach but we scheduled a salvage, we already + * notified the fileserver; don't online it now */ + if (*ec != VSALVAGING) +#endif /* AFS_DEMAND_ATTACH_FS */ FSYNC_VolOp(iheader.id, partition, FSYNC_VOL_ON, 0, NULL); } else #endif @@ -2543,18 +2562,18 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h #if defined(AFS_DEMAND_ATTACH_FS) if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) { VOL_LOCK; - if (VCanScheduleSalvage()) { - VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); - vp->nUsers = 0; - } else { + if (!VCanScheduleSalvage()) { Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec); - FreeVolume(vp); - *ec = VSALVAGE; } + VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); + vp->nUsers = 0; + + VCheckFree(vp); return NULL; } else if (*ec) { /* volume operation in progress */ VOL_LOCK; + VCheckFree(vp); return NULL; } #else /* AFS_DEMAND_ATTACH_FS */ @@ -2571,14 +2590,13 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h vp->specialStatus = 0; VOL_LOCK; #if defined(AFS_DEMAND_ATTACH_FS) - if (VCanScheduleSalvage()) { - VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER); - vp->nUsers = 0; - } else { + if (!VCanScheduleSalvage()) { Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path); - FreeVolume(vp); - *ec = VSALVAGE; } + VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER); + vp->nUsers = 0; + + VCheckFree(vp); #else /* AFS_DEMAND_ATTACH_FS */ FreeVolume(vp); *ec = VSALVAGE; @@ -2595,8 +2613,13 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h VUpdateVolume_r(ec, vp, 0); } #if defined(AFS_DEMAND_ATTACH_FS) + if (!VCanScheduleSalvage()) { + Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path); + } VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER); vp->nUsers = 0; + + VCheckFree(vp); #else /* AFS_DEMAND_ATTACH_FS */ Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path); FreeVolume(vp); @@ -2639,6 +2662,7 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h #ifdef AFS_DEMAND_ATTACH_FS VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); vp->nUsers = 0; + VCheckFree(vp); #else /* AFS_DEMAND_ATTACH_FS */ FreeVolume(vp); #endif /* AFS_DEMAND_ATTACH_FS */ @@ -4061,17 +4085,6 @@ VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo) /* online salvager routines */ /***************************************************/ #if defined(AFS_DEMAND_ATTACH_FS) -#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */ -#define SALVAGE_COUNT_MAX 16 /**< number of online salvages we - * allow before moving the volume - * into a permanent error state - * - * once this threshold is reached, - * the operator will have to manually - * issue a 'bos salvage' to bring - * the volume back online - */ - /** * check whether a salvage needs to be performed on this volume. * @@ -4152,11 +4165,59 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) return 1; } + if (programType != fileServer) { +#ifdef FSSYNC_BUILD_CLIENT + if (VCanUseFSSYNC()) { + /* + * If we aren't the fileserver, tell the fileserver the volume + * needs to be salvaged. We could directly tell the + * salvageserver, but the fileserver keeps track of some stats + * related to salvages, and handles some other salvage-related + * complications for us. + */ + + /* + * You might wonder why we don't check for + * VIsSalvager(V_inUse(vp)) here, since we do check for that + * in the fileServer case (below). The reason is that the + * below check is done since the fileServer can't tell if a + * salvage is still running or not when V_inUse refers to a + * salvaging program. However, if we are a non-fileserver, + * to get here we must have checked out the volume from the + * fileserver and locked the partition, meaning there must + * be no salvager running; so we just always try to salvage + */ + + code = FSYNC_VolOp(vp->hashid, vp->partition->name, + FSYNC_VOL_FORCE_ERROR, FSYNC_SALVAGE, NULL); + if (code == SYNC_OK) { + *ec = VSALVAGING; + return 0; + } + Log("VRequestSalvage: force error salvage state of volume %u" + " denied by fileserver\n", vp->hashid); + + /* fall through to error condition below */ + } +#endif /* FSSYNC_BUILD_CLIENT */ + VChangeState_r(vp, VOL_STATE_ERROR); + *ec = VSALVAGE; + return 1; + } + if (!vp->salvage.requested) { vp->salvage.requested = 1; vp->salvage.reason = reason; vp->stats.last_salvage = FT_ApproxTime(); - if (VIsSalvager(V_inUse(vp))) { + + if (vp->header && VIsSalvager(V_inUse(vp))) { + /* Right now we can't tell for sure if this indicates a + * salvage is running, or if a running salvage crashed, so + * we always ERROR the volume in case a salvage is running. + * Once we get rid of the partition lock and instead lock + * individual volume header files for salvages, we will + * probably be able to tell if a salvage is running, and we + * can do away with this behavior. */ Log("VRequestSalvage: volume %u appears to be salvaging, but we\n", vp->hashid); Log(" didn't request a salvage. Forcing it offline waiting for the\n"); Log(" salvage to finish; if you are sure no salvage is running,\n"); @@ -4181,6 +4242,11 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) *ec = VSALVAGING; } else { Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid); + + /* make sure neither VScheduleSalvage_r nor + * VUpdateSalvagePriority_r try to schedule another salvage */ + vp->salvage.requested = vp->salvage.scheduled = 0; + VChangeState_r(vp, VOL_STATE_ERROR); *ec = VSALVAGE; code = 1; @@ -4224,7 +4290,7 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) * * @internal volume package internal use only. */ -static int +int VUpdateSalvagePriority_r(Volume * vp) { int ret=0; diff --git a/src/vol/volume.h b/src/vol/volume.h index f1b71ae..0c7861e 100644 --- a/src/vol/volume.h +++ b/src/vol/volume.h @@ -586,6 +586,18 @@ typedef struct VolumeStats { afs_uint32 last_vol_op; /**< unix timestamp of last volume operation */ } VolumeStats; + +#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */ +#define SALVAGE_COUNT_MAX 16 /**< number of online salvages we + * allow before moving the volume + * into a permanent error state + * + * once this threshold is reached, + * the operator will have to manually + * issue a 'bos salvage' to bring + * the volume back online + */ + /** * DAFS online salvager state. */ @@ -827,6 +839,7 @@ extern void VPrintExtendedCacheStats_r(int flags); extern void VLRU_SetOptions(int option, afs_uint32 val); extern int VSetVolHashSize(int logsize); extern int VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags); +extern int VUpdateSalvagePriority_r(Volume * vp); extern int VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo); extern int VDeregisterVolOp_r(Volume * vp); extern void VCancelReservation_r(Volume * vp); diff --git a/src/volser/volprocs.c b/src/volser/volprocs.c index 6ead0a5..87ead74 100644 --- a/src/volser/volprocs.c +++ b/src/volser/volprocs.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "volser.h" #include "voltrans_inline.h" @@ -213,12 +214,70 @@ ConvertPartition(int apartno, char *aname, int asize) return 0; } +static struct Volume * +VAttachVolumeByName_retry(Error *ec, char *partition, char *name, int mode) +{ + struct Volume *vp; + + *ec = 0; + vp = VAttachVolumeByName(ec, partition, name, mode); + +#ifdef AFS_DEMAND_ATTACH_FS + { + int i; + /* + * The fileserver will take care of keeping track of how many + * demand-salvages have been performed, and will force the volume to + * ERROR if we've done too many. The limit on This loop is just a + * failsafe to prevent trying to salvage forever. We want to attempt + * attachment at least SALVAGE_COUNT_MAX times, since we want to + * avoid prematurely exiting this loop, if we can. + */ + for (i = 0; i < SALVAGE_COUNT_MAX*2 && *ec == VSALVAGING; i++) { + sleep(SALVAGE_PRIO_UPDATE_INTERVAL); + vp = VAttachVolumeByName(ec, partition, name, mode); + } + + if (*ec == VSALVAGING) { + *ec = VSALVAGE; + } + } +#endif /* AFS_DEMAND_ATTACH_FS */ + + return vp; +} + +static struct Volume * +VAttachVolume_retry(Error *ec, afs_uint32 avolid, int amode) +{ + struct Volume *vp; + + *ec = 0; + vp = VAttachVolume(ec, avolid, amode); + +#ifdef AFS_DEMAND_ATTACH_FS + { + int i; + /* see comment above in VAttachVolumeByName_retry */ + for (i = 0; i < SALVAGE_COUNT_MAX*2 && *ec == VSALVAGING; i++) { + sleep(SALVAGE_PRIO_UPDATE_INTERVAL); + vp = VAttachVolume(ec, avolid, amode); + } + + if (*ec == VSALVAGING) { + *ec = VSALVAGE; + } + } +#endif /* AFS_DEMAND_ATTACH_FS */ + + return vp; +} + /* the only attach function that takes a partition is "...ByName", so we use it */ -struct Volume * +static struct Volume * XAttachVolume(afs_int32 *error, afs_uint32 avolid, afs_int32 apartid, int amode) { char pbuf[30], vbuf[20]; - register struct Volume *tv; if (ConvertPartition(apartid, pbuf, sizeof(pbuf))) { *error = EINVAL; @@ -228,8 +287,8 @@ XAttachVolume(afs_int32 *error, afs_uint32 avolid, afs_int32 apartid, int amode) *error = EINVAL; return NULL; } - tv = VAttachVolumeByName((Error *)error, pbuf, vbuf, amode); - return tv; + + return VAttachVolumeByName_retry((Error *)error, pbuf, vbuf, amode); } /* Adapted from the file server; create a root directory for this volume */ @@ -626,7 +685,7 @@ VolClone(struct rx_call *acid, afs_int32 atrans, afs_uint32 purgeId, if (purgeId) { - purgevp = VAttachVolume(&error, purgeId, V_VOLUPD); + purgevp = VAttachVolume_retry(&error, purgeId, V_VOLUPD); if (error) { Log("1 Volser: Clone: Could not attach 'purge' volume %u; clone aborted\n", purgeId); goto fail; @@ -809,7 +868,7 @@ VolReClone(struct rx_call *acid, afs_int32 atrans, afs_int32 cloneId) goto fail; } - clonevp = VAttachVolume(&error, cloneId, V_VOLUPD); + clonevp = VAttachVolume_retry(&error, cloneId, V_VOLUPD); if (error) { Log("1 Volser: can't attach clone %d\n", cloneId); goto fail; @@ -2115,7 +2174,7 @@ GetVolInfo(afs_uint32 partId, } /* Get volume from volserver */ - tv = VAttachVolumeByName(&error, pname, volname, V_PEEK); + tv = VAttachVolumeByName_retry(&error, pname, volname, V_PEEK); if (error) { Log("1 Volser: GetVolInfo: Could not attach volume %u (%s:%s) error=%d\n", volumeId, pname, volname, error); -- 1.9.4