From a7b33b55bb34027262fb5deb686a86fba1d2f832 Mon Sep 17 00:00:00 2001 From: Tom Keiser Date: Thu, 12 Jun 2008 20:12:06 +0000 Subject: [PATCH] dafs-updates-20080612 LICENSE IPL10 pending changes from tom to avoid a deadlock issue --- src/util/volparse.c | 119 ++++++++--- src/vol/daemon_com.c | 40 +++- src/vol/daemon_com.h | 54 +++-- src/vol/fssync-debug.c | 55 +++++ src/vol/fssync-server.c | 504 +++++++++++++++++++++++++++++++++++++--------- src/vol/fssync.h | 4 +- src/vol/partition.c | 36 +++- src/vol/purge.c | 4 + src/vol/salvsync-server.c | 6 +- src/vol/salvsync.h | 5 +- src/vol/vol-salvage.c | 8 +- src/vol/vol-salvage.h | 2 +- src/vol/volume.c | 151 ++++++++++---- src/vol/volume.h | 11 + src/volser/volprocs.c | 57 ++++-- 15 files changed, 837 insertions(+), 219 deletions(-) diff --git a/src/util/volparse.c b/src/util/volparse.c index a358da7..bfa0110 100644 --- a/src/util/volparse.c +++ b/src/util/volparse.c @@ -18,7 +18,18 @@ RCSID #include #endif -/* map a partition id from any partition-style name */ +/** + * map a partition id from any partition-style name. + * + * @param[in] aname partition name string + * + * @return partition index number + * @retval -1 invalid partition name + * + * @see volutil_PartitionName2_r + * @see volutil_PartitionName_r + * @see volutil_PartitionName + */ afs_int32 volutil_GetPartitionID(char *aname) { @@ -69,40 +80,102 @@ volutil_GetPartitionID(char *aname) } } -/* map a partition number back into a partition string */ -#define BAD_VID "BAD VOLUME ID" -#define BAD_VID_LEN (sizeof(BAD_VID)) -char * -volutil_PartitionName_r(int avalue, char *tbuffer, int buflen) +/** + * convert a partition index number into a partition name string (/vicepXX). + * + * @param[in] part partition index number + * @param[out] tbuffer buffer in which to store name + * @param[in] buflen length of tbuffer + * + * @return operation status + * @retval 0 success + * @retval -1 buffer too short + * @retval -2 invalid partition id + * + * @see volutil_PartitionName_r + * @see volutil_PartitionName + * @see volutil_GetPartitionID + */ +afs_int32 +volutil_PartitionName2_r(afs_int32 part, char *tbuffer, size_t buflen) { char tempString[3]; register int i; - if (buflen < BAD_VID_LEN) { - if (buflen > 3) - (void)strcpy(tbuffer, "SPC"); - else - tbuffer[0] = '\0'; - return tbuffer; + if (part < 0 || part >= (26 * 26 + 26)) { + return -2; } - memset(tbuffer, 0, buflen); + tempString[1] = tempString[2] = 0; - strcpy(tbuffer, "/vicep"); - if (avalue < 0 || avalue >= (26 * 26 + 26)) { - strcpy(tbuffer, "BAD VOLUME ID"); - } else if (avalue <= 25) { - tempString[0] = 'a' + avalue; - strcat(tbuffer, tempString); + strncpy(tbuffer, "/vicep", buflen); + if (part <= 25) { + tempString[0] = 'a' + part; } else { - avalue -= 26; - i = (avalue / 26); + part -= 26; + i = (part / 26); tempString[0] = i + 'a'; - tempString[1] = (avalue % 26) + 'a'; - strcat(tbuffer, tempString); + tempString[1] = (part % 26) + 'a'; + } + if (strlcat(tbuffer, tempString, buflen) >= buflen) { + return -1; + } + return tbuffer; +} + +#define BAD_VID "BAD VOLUME ID" +#define BAD_VID_LEN (sizeof(BAD_VID)) +/** + * convert a partition index number into a partition name string (/vicepXX). + * + * @param[in] part partition index number + * @param[out] tbuffer buffer in which to store name + * @param[in] buflen length of tbuffer + * + * @return partition name string + * @retval "" buffer too short + * @retval "SPC" buffer too short + * @retval "BAD VOLUME ID" avalue contains an invalid partition index + * + * @note you may wish to consider using volutil_PartitionName2_r, as its + * error handling is more standard + * + * @see volutil_PartitionName2_r + * @see volutil_PartitionName + * @see volutil_GetPartitionID + */ +char * +volutil_PartitionName_r(int part, char *tbuffer, int buflen) +{ + afs_int32 code; + + if (buflen < BAD_VID_LEN) { + strlcpy(tbuffer, "SPC", buflen); + return tbuffer; } + + code = volutil_PartitionName2_r(part, tbuffer, buflen); + + if (code == -2) { + strlcpy(tbuffer, BAD_VID, buflen); + } + return tbuffer; } +/** + * convert a partition index number into a partition name string (/vicepXX). + * + * @param[in] avalue partition index number + * + * @return partition name string + * @retval "BAD VOLUME ID" avalue contains an invalid partition index + * + * @warning this interface is not re-entrant + * + * @see volutil_PartitionName2_r + * @see volutil_PartitionName_r + * @see volutil_GetPartitionID + */ char * volutil_PartitionName(int avalue) { diff --git a/src/vol/daemon_com.c b/src/vol/daemon_com.c index 1114547..764f234 100644 --- a/src/vol/daemon_com.c +++ b/src/vol/daemon_com.c @@ -369,7 +369,26 @@ SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * goto done; } + /* + * fill in some common header fields + */ com->hdr.proto_version = state->proto_version; + com->hdr.pkt_seq = ++state->pkt_seq; + com->hdr.com_seq = ++state->com_seq; +#ifdef AFS_NT40_ENV + com->hdr.pid = 0; + com->hdr.tid = 0; +#else + com->hdr.pid = getpid(); +#ifdef AFS_PTHREAD_ENV + com->hdr.tid = (afs_int32)pthread_self(); +#else + { + PROCESS handle = LWP_ThreadId(); + com->hdr.tid = (handle) ? handle->index : 0; + } +#endif /* !AFS_PTHREAD_ENV */ +#endif /* !AFS_NT40_ENV */ memcpy(buf, &com->hdr, sizeof(com->hdr)); if (com->payload.len) { @@ -473,15 +492,18 @@ SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * /** * receive a command structure off a sync socket. * - * @param[in] fd socket descriptor - * @param[out] com sync command object to be populated + * @param[in] state pointer to server-side state object + * @param[in] fd file descriptor on which to perform i/o + * @param[out] com sync command object to be populated * * @return operation status * @retval SYNC_OK command received * @retval SYNC_COM_ERROR there was a socket communications error */ afs_int32 -SYNC_getCom(int fd, SYNC_command * com) +SYNC_getCom(SYNC_server_state_t * state, + int fd, + SYNC_command * com) { int n; afs_int32 code = SYNC_OK; @@ -546,15 +568,18 @@ SYNC_getCom(int fd, SYNC_command * com) /** * write a response structure to a sync socket. * - * @param[in] fd - * @param[in] res + * @param[in] state handle to server-side state object + * @param[in] fd file descriptor on which to perform i/o + * @param[in] res handle to response packet * * @return operation status * @retval SYNC_OK * @retval SYNC_COM_ERROR */ afs_int32 -SYNC_putRes(int fd, SYNC_response * res) +SYNC_putRes(SYNC_server_state_t * state, + int fd, + SYNC_response * res) { int n; afs_int32 code = SYNC_OK; @@ -575,6 +600,9 @@ SYNC_putRes(int fd, SYNC_response * res) #ifdef AFS_DEMAND_ATTACH_FS res->hdr.flags |= SYNC_FLAG_DAFS_EXTENSIONS; #endif + res->hdr.proto_version = state->proto_version; + res->hdr.pkt_seq = ++state->pkt_seq; + res->hdr.res_seq = ++state->res_seq; memcpy(buf, &res->hdr, sizeof(res->hdr)); if (res->payload.len) { diff --git a/src/vol/daemon_com.h b/src/vol/daemon_com.h index 41ec3ca..fee96e8 100644 --- a/src/vol/daemon_com.h +++ b/src/vol/daemon_com.h @@ -8,7 +8,7 @@ */ #ifndef _AFS_VOL_DAEMON_COM_H -#define _AFS_VOL_DAEMON_COM_H +#define _AFS_VOL_DAEMON_COM_H 1 /* * SYNC protocol constants @@ -48,7 +48,7 @@ enum SYNCReasonCode { SYNC_COM_ERROR = 2, /**< sync protocol communicaions error */ SYNC_BAD_COMMAND = 3, /**< sync command code not implemented by server */ SYNC_FAILED = 4, /**< sync server-side procedure failed */ - SYNC_REASON_CODE_END + SYNC_RESPONSE_CODE_END }; /* SYNC protocol reason codes @@ -61,9 +61,10 @@ enum SYNCReasonCode { /* general reason codes */ #define SYNC_REASON_NONE 0 -#define SYNC_REASON_MALFORMED_PACKET 1 -#define SYNC_REASON_NOMEM 2 +#define SYNC_REASON_MALFORMED_PACKET 1 /**< command packet was malformed */ +#define SYNC_REASON_NOMEM 2 /**< sync server out of memory */ #define SYNC_REASON_ENCODING_ERROR 3 +#define SYNC_REASON_PAYLOAD_TOO_BIG 4 /**< payload too big for response packet buffer */ /* SYNC protocol flags * @@ -120,6 +121,8 @@ typedef struct SYNC_server_state { int listen_depth; /**< socket listen queue depth */ char * proto_name; /**< sync protocol associated with this conn */ SYNC_sockaddr_t addr; /**< server listen socket sockaddr */ + afs_uint32 pkt_seq; /**< packet xmit sequence counter */ + afs_uint32 res_seq; /**< response xmit sequence counter */ } SYNC_server_state_t; /** @@ -132,25 +135,40 @@ typedef struct SYNC_client_state { int retry_limit; /**< max number of times for SYNC_ask to retry */ afs_int32 hard_timeout; /**< upper limit on time to keep trying */ char * proto_name; /**< sync protocol associated with this conn */ - byte fatal_error; /**< nonzer if fatal error on this client conn */ + byte fatal_error; /**< nonzero if fatal error on this client conn */ + afs_uint32 pkt_seq; /**< packet xmit sequence counter */ + afs_uint32 com_seq; /**< command xmit sequence counter */ } SYNC_client_state; /* wire types */ +/** + * on-wire command packet header. + */ typedef struct SYNC_command_hdr { - afs_uint32 proto_version; /* sync protocol version */ - afs_int32 programType; /* type of program issuing the request */ - afs_int32 command; /* request type */ - afs_int32 reason; /* reason for request */ - afs_uint32 command_len; /* entire length of command */ - afs_uint32 flags; + afs_uint32 proto_version; /**< sync protocol version */ + afs_uint32 pkt_seq; /**< packet sequence number */ + afs_uint32 com_seq; /**< command sequence number */ + afs_int32 programType; /**< type of program issuing the request */ + afs_int32 pid; /**< pid of requestor */ + afs_int32 tid; /**< thread id of requestor */ + afs_int32 command; /**< request type */ + afs_int32 reason; /**< reason for request */ + afs_uint32 command_len; /**< entire length of command */ + afs_uint32 flags; /**< miscellanous control flags */ } SYNC_command_hdr; +/** + * on-wire response packet header. + */ typedef struct SYNC_response_hdr { - afs_uint32 proto_version; /* sync protocol version */ - afs_uint32 response_len; /* entire length of response */ - afs_int32 response; /* response code */ - afs_int32 reason; /* reason for response */ - afs_uint32 flags; + afs_uint32 proto_version; /**< sync protocol version */ + afs_uint32 pkt_seq; /**< packet sequence number */ + afs_uint32 com_seq; /**< in response to com_seq... */ + afs_uint32 res_seq; /**< response sequence number */ + afs_uint32 response_len; /**< entire length of response */ + afs_int32 response; /**< response code */ + afs_int32 reason; /**< reason for response */ + afs_uint32 flags; /**< miscellanous control flags */ } SYNC_response_hdr; @@ -185,8 +203,8 @@ extern afs_int32 SYNC_closeChannel(SYNC_client_state *); /* do a graceful chann extern int SYNC_reconnect(SYNC_client_state *); /* do a reconnect after a protocol error, or from a forked child */ /* server-side prototypes */ -extern int SYNC_getCom(int fd, SYNC_command * com); -extern int SYNC_putRes(int fd, SYNC_response * res); +extern int SYNC_getCom(SYNC_server_state_t *, int fd, SYNC_command * com); +extern int SYNC_putRes(SYNC_server_state_t *, int fd, SYNC_response * res); extern int SYNC_verifyProtocolString(char * buf, size_t len); extern void SYNC_cleanupSock(SYNC_server_state_t * state); extern int SYNC_bindSock(SYNC_server_state_t * state); diff --git a/src/vol/fssync-debug.c b/src/vol/fssync-debug.c index 47d2ce1..651c943 100644 --- a/src/vol/fssync-debug.c +++ b/src/vol/fssync-debug.c @@ -98,6 +98,9 @@ static int VolDetach(struct cmd_syndesc * as, void * rock); static int VolBreakCBKs(struct cmd_syndesc * as, void * rock); static int VolMove(struct cmd_syndesc * as, void * rock); static int VolList(struct cmd_syndesc * as, void * rock); +static int VolLeaveOff(struct cmd_syndesc * as, void * rock); +static int VolForceAttach(struct cmd_syndesc * as, void * rock); +static int VolForceError(struct cmd_syndesc * as, void * rock); static int VolQuery(struct cmd_syndesc * as, void * rock); static int VolHdrQuery(struct cmd_syndesc * as, void * rock); static int VolOpQuery(struct cmd_syndesc * as, void * rock); @@ -180,6 +183,15 @@ main(int argc, char **argv) VOLOP_PARMS_DECL(ts); cmd_CreateAlias(ts, "ls"); + ts = cmd_CreateSyntax("leaveoff", VolLeaveOff, 0, "leave volume offline (FSYNC_VOL_LEAVE_OFF opcode)"); + VOLOP_PARMS_DECL(ts); + + ts = cmd_CreateSyntax("attach", VolForceAttach, 0, "force full attachment (FSYNC_VOL_ATTACH opcode)"); + VOLOP_PARMS_DECL(ts); + + ts = cmd_CreateSyntax("error", VolForceError, 0, "force into hard error state (FSYNC_VOL_FORCE_ERROR opcode)"); + VOLOP_PARMS_DECL(ts); + ts = cmd_CreateSyntax("query", VolQuery, NULL, "get volume structure (FSYNC_VOL_QUERY opcode)"); VOLOP_PARMS_DECL(ts); cmd_CreateAlias(ts, "qry"); @@ -500,6 +512,45 @@ VolList(struct cmd_syndesc * as, void * rock) return 0; } +static int +VolLeaveOff(struct cmd_syndesc * as, void * rock) +{ + struct state state; + + common_prolog(as, &state); + common_volop_prolog(as, &state); + + do_volop(&state, FSYNC_VOL_LEAVE_OFF, NULL); + + return 0; +} + +static int +VolForceAttach(struct cmd_syndesc * as, void * rock) +{ + struct state state; + + common_prolog(as, &state); + common_volop_prolog(as, &state); + + do_volop(&state, FSYNC_VOL_ATTACH, NULL); + + return 0; +} + +static int +VolForceError(struct cmd_syndesc * as, void * rock) +{ + struct state state; + + common_prolog(as, &state); + common_volop_prolog(as, &state); + + do_volop(&state, FSYNC_VOL_FORCE_ERROR, NULL); + + return 0; +} + #ifdef AFS_DEMAND_ATTACH_FS static char * vol_state_to_string(VolState state) @@ -835,8 +886,12 @@ VolOpQuery(struct cmd_syndesc * as, void * rock) printf("\tcom = {\n"); printf("\t\tproto_version = %u\n", vop.com.proto_version); + printf("\t\tpkt_seq = %u\n", vop.com.pkt_seq); + printf("\t\tcom_seq = %u\n", vop.com.com_seq); printf("\t\tprogramType = %d (%s)\n", vop.com.programType, program_type_to_string(vop.com.programType)); + printf("\t\tpid = %d\n", vop.com.pid); + printf("\t\ttid = %d\n", vop.com.tid); printf("\t\tcommand = %d (%s)\n", vop.com.command, command_code_to_string(vop.com.command)); printf("\t\treason = %d (%s)\n", diff --git a/src/vol/fssync-server.c b/src/vol/fssync-server.c index 1d72556..eae04be 100644 --- a/src/vol/fssync-server.c +++ b/src/vol/fssync-server.c @@ -179,6 +179,8 @@ static afs_int32 FSYNC_com_StatsOpVLRU(FSSYNC_StatsOp_command * scom, SYNC_respo static void FSYNC_com_to_info(FSSYNC_VolOp_command * vcom, FSSYNC_VolOp_info * info); +static int FSYNC_partMatch(FSSYNC_VolOp_command * vcom, Volume * vp, int match_anon); + /* * This lock controls access to the handler array. The overhead @@ -230,6 +232,9 @@ FSYNC_sync(void * args) int tid; #endif SYNC_server_state_t * state = &fssync_server_state; +#ifdef AFS_DEMAND_ATTACH_FS + VThreadOptions_t * thread_opts; +#endif SYNC_getAddr(&state->endpoint, &state->addr); SYNC_cleanupSock(state); @@ -256,11 +261,26 @@ FSYNC_sync(void * args) LWP_DispatchProcess(); #endif /* AFS_PTHREAD_ENV */ } - state->fd = SYNC_getSock(&state->endpoint); code = SYNC_bindSock(state); assert(!code); +#ifdef AFS_DEMAND_ATTACH_FS + /* + * make sure the volume package is incapable of recursively executing + * salvsync calls on this thread, since there is a possibility of + * deadlock. + */ + thread_opts = malloc(sizeof(VThreadOptions_t)); + if (thread_opts == NULL) { + Log("failed to allocate memory for thread-specific volume package options structure\n"); + return NULL; + } + memcpy(thread_opts, &VThread_defaults, sizeof(VThread_defaults)); + thread_opts->disallow_salvsync = 1; + assert(pthread_setspecific(VThread_key, thread_opts) == 0); +#endif + InitHandler(); AcceptOn(); @@ -321,12 +341,11 @@ FSYNC_com(int fd) com.payload.buf = (void *)com_buf; com.payload.len = SYNC_PROTO_MAX_LEN; res.hdr.response_len = sizeof(res.hdr); - res.hdr.proto_version = FSYNC_PROTO_VERSION; res.payload.len = SYNC_PROTO_MAX_LEN; res.payload.buf = (void *)res_buf; FS_cnt++; - if (SYNC_getCom(fd, &com)) { + if (SYNC_getCom(&fssync_server_state, fd, &com)) { Log("FSYNC_com: read failed; dropping connection (cnt=%d)\n", FS_cnt); FSYNC_Drop(fd); return; @@ -353,6 +372,7 @@ FSYNC_com(int fd) goto respond; } + res.hdr.com_seq = com.hdr.com_seq; VOL_LOCK; switch (com.hdr.command) { @@ -388,7 +408,7 @@ FSYNC_com(int fd) VOL_UNLOCK; respond: - SYNC_putRes(fd, &res); + SYNC_putRes(&fssync_server_state, fd, &res); if (res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN) { FSYNC_Drop(fd); } @@ -464,6 +484,30 @@ FSYNC_com_VolOp(int fd, SYNC_command * com, SYNC_response * res) return code; } +/** + * service an FSYNC request to bring a volume online. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume transitioned online + * @retval SYNC_FAILED invalid command protocol message + * @retval SYNC_DENIED operation could not be completed + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_ON + * - FSYNC_VOL_ATTACH + * - FSYNC_VOL_LEAVE_OFF + * + * @note the supplementary reason code contains additional details. + * When SYNC_DENIED is returned, the specific reason is + * placed in the response packet reason field. + * + * @internal + */ static afs_int32 FSYNC_com_VolOn(FSSYNC_VolOp_command * vcom, SYNC_response * res) { @@ -478,49 +522,13 @@ FSYNC_com_VolOn(FSSYNC_VolOp_command * vcom, SYNC_response * res) goto done; } - /* - This is where a detatched volume gets reattached. However in the - special case where the volume is merely busy, it is already - attatched and it is only necessary to clear the busy flag. See - defect #2080 for details. - */ - - /* is the volume already attatched? */ -#ifdef notdef - /* - * XXX With the following enabled we had bizarre problems where the backup id would - * be reset to 0; that was due to the interaction between fileserver/volserver in that they - * both keep volumes in memory and the changes wouldn't be made to the fileserver. Some of - * the problems were due to refcnt changes as result of VGetVolume/VPutVolume which would call - * VOffline, etc. when we don't want to; someday the whole #2080 issue should be revisited to - * be done right XXX - */ - vp = VGetVolume_r(&error, vcom->vop->volume); - if (vp) { - /* yep, is the BUSY flag set? */ - if (vp->specialStatus == VBUSY) { - - /* yep, clear BUSY flag */ - - vp->specialStatus = 0; - /* make sure vol is online */ - if (vcom->v) { - vcom->v->volumeID = 0; - V_inUse(vp) = 1; /* online */ - } - VPutVolume_r(vp); - break; - } - VPutVolume_r(vp); - } -#endif /* notdef */ - /* so, we need to attach the volume */ #ifdef AFS_DEMAND_ATTACH_FS /* check DAFS permissions */ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL); - if (vp && !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName) && + if (vp && + FSYNC_partMatch(vcom, vp, 1) && vp->pending_vol_op && (vcom->hdr->programType != vp->pending_vol_op->com.programType)) { /* a different program has this volume checked out. deny. */ @@ -545,10 +553,23 @@ FSYNC_com_VolOn(FSSYNC_VolOp_command * vcom, SYNC_response * res) if (vcom->hdr->command == FSYNC_VOL_LEAVE_OFF) { /* nothing much to do if we're leaving the volume offline */ #ifdef AFS_DEMAND_ATTACH_FS - if (vp && - !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName)) { - VDeregisterVolOp_r(vp); - VChangeState_r(vp, VOL_STATE_UNATTACHED); + if (vp) { + if (FSYNC_partMatch(vcom, vp, 1)) { + if ((V_attachState(vp) == VOL_STATE_UNATTACHED) || + (V_attachState(vp) == VOL_STATE_PREATTACHED)) { + VChangeState_r(vp, VOL_STATE_UNATTACHED); + VDeregisterVolOp_r(vp); + } else { + code = SYNC_DENIED; + res->hdr.reason = FSYNC_BAD_STATE; + } + } else { + code = SYNC_DENIED; + res->hdr.reason = FSYNC_WRONG_PART; + } + } else { + code = SYNC_DENIED; + res->hdr.reason = FSYNC_UNKNOWN_VOLID; } #endif goto done; @@ -581,6 +602,29 @@ FSYNC_com_VolOn(FSSYNC_VolOp_command * vcom, SYNC_response * res) return code; } +/** + * service an FSYNC request to take a volume offline. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume transitioned offline + * @retval SYNC_FAILED invalid command protocol message + * @retval SYNC_DENIED operation could not be completed + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_OFF + * - FSYNC_VOL_NEEDVOLUME + * + * @note the supplementary reason code contains additional details. + * When SYNC_DENIED is returned, the specific reason is + * placed in the response packet reason field. + * + * @internal + */ static afs_int32 FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) { @@ -589,6 +633,9 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) int i; Volume * vp, * nvp; Error error; +#ifdef AFS_DEMAND_ATTACH_FS + int reserved = 0; +#endif if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; @@ -623,9 +670,7 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) #endif if (vp) { - if ((vcom->vop->partName[0] != 0) && - (strncmp(vcom->vop->partName, vp->partition->name, - sizeof(vcom->vop->partName)) != 0)) { + if (!FSYNC_partMatch(vcom, vp, 1)) { /* volume on desired partition is not online, so we * should treat this as an offline volume. */ @@ -683,15 +728,21 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) */ switch (type) { case salvageServer: + /* it is possible for the salvageserver to checkout a + * volume for salvage before its scheduling request + * has been sent to the salvageserver */ + if (vp->salvage.requested && !vp->salvage.scheduled) { + vp->salvage.scheduled = 1; + } case debugUtility: - /* give the salvageserver lots of liberty */ break; + case volumeUtility: - if ((V_attachState(vp) == VOL_STATE_ERROR) || - (V_attachState(vp) == VOL_STATE_SALVAGING)) { + if (VIsErrorState(V_attachState(vp))) { goto deny; } break; + default: Log("bad program type passed to FSSYNC\n"); goto deny; @@ -718,16 +769,20 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) /* convert to heavyweight ref */ nvp = VGetVolumeByVp_r(&error, vp); - /* register the volume operation metadata with the volume */ - VRegisterVolOp_r(vp, &info); - if (!nvp) { Log("FSYNC_com_VolOff: failed to get heavyweight reference to volume %u\n", vcom->vop->volume); res->hdr.reason = FSYNC_VOL_PKG_ERROR; goto deny; + } else if (nvp != vp) { + /* i don't think this should ever happen, but just in case... */ + Log("FSYNC_com_VolOff: warning: potentially dangerous race detected\n"); + vp = nvp; } - vp = nvp; + + /* register the volume operation metadata with the volume */ + VRegisterVolOp_r(vp, &info); + } #endif /* AFS_DEMAND_ATTACH_FS */ @@ -769,12 +824,44 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) return SYNC_DENIED; } +/** + * service an FSYNC request to mark a volume as moved. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume marked as moved to a remote server + * @retval SYNC_FAILED invalid command protocol message + * @retval SYNC_DENIED current volume state does not permit this operation + * + * @note this is an FSYNC RPC server stub + * + * @note this operation also breaks all callbacks for the given volume + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_MOVE + * + * @note the supplementary reason code contains additional details. For + * instance, SYNC_OK is still returned when the partition specified + * does not match the one registered in the volume object -- reason + * will be FSYNC_WRONG_PART in this case. + * + * @internal + */ static afs_int32 FSYNC_com_VolMove(FSSYNC_VolOp_command * vcom, SYNC_response * res) { + afs_int32 code = SYNC_DENIED; Error error; Volume * vp; + if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { + res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; + code = SYNC_FAILED; + goto done; + } + /* Yuch: the "reason" for the move is the site it got moved to... */ /* still set specialStatus so we stop sending back VBUSY. * also should still break callbacks. Note that I don't know @@ -787,13 +874,27 @@ FSYNC_com_VolMove(FSSYNC_VolOp_command * vcom, SYNC_response * res) vp = VGetVolume_r(&error, vcom->vop->volume); #endif if (vp) { - vp->specialStatus = VMOVED; -#ifndef AFS_DEMAND_ATTACH_FS - VPutVolume_r(vp); + if (FSYNC_partMatch(vcom, vp, 1)) { +#ifdef AFS_DEMAND_ATTACH_FS + if ((V_attachState(vp) == VOL_STATE_UNATTACHED) || + (V_attachState(vp) == VOL_STATE_PREATTACHED)) { #endif + code = SYNC_OK; + vp->specialStatus = VMOVED; +#ifdef AFS_DEMAND_ATTACH_FS + } else { + res->hdr.reason = FSYNC_BAD_STATE; + } +#endif + } else { + res->hdr.reason = FSYNC_WRONG_PART; + } + VPutVolume_r(vp); + } else { + res->hdr.reason = FSYNC_UNKNOWN_VOLID; } - if (V_BreakVolumeCallbacks) { + if ((code == SYNC_OK) && (V_BreakVolumeCallbacks != NULL)) { Log("fssync: volume %u moved to %x; breaking all call backs\n", vcom->vop->volume, vcom->hdr->reason); VOL_UNLOCK; @@ -801,17 +902,48 @@ FSYNC_com_VolMove(FSSYNC_VolOp_command * vcom, SYNC_response * res) VOL_LOCK; } - return SYNC_OK; + + done: + return code; } +/** + * service an FSYNC request to mark a volume as destroyed. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume marked as destroyed + * @retval SYNC_FAILED invalid command protocol message + * @retval SYNC_DENIED current volume state does not permit this operation + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_DONE + * + * @note the supplementary reason code contains additional details. For + * instance, SYNC_OK is still returned when the partition specified + * does not match the one registered in the volume object -- reason + * will be FSYNC_WRONG_PART in this case. + * + * @internal + */ static afs_int32 FSYNC_com_VolDone(FSSYNC_VolOp_command * vcom, SYNC_response * res) { + afs_int32 code = SYNC_FAILED; #ifdef AFS_DEMAND_ATTACH_FS Error error; Volume * vp; #endif + if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { + res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; + goto done; + } + /* don't try to put online, this call is made only after deleting * a volume, in which case we want to remove the vol # from the * OfflineVolumes array only */ @@ -821,38 +953,107 @@ FSYNC_com_VolDone(FSSYNC_VolOp_command * vcom, SYNC_response * res) #ifdef AFS_DEMAND_ATTACH_FS vp = VLookupVolume_r(&error, vcom->vop->volume, NULL); if (vp) { - VChangeState_r(vp, VOL_STATE_UNATTACHED); - VDeregisterVolOp_r(vp); + if (FSYNC_partMatch(vcom, vp, 1)) { + if ((V_attachState(vp) == VOL_STATE_UNATTACHED) || + (V_attachState(vp) == VOL_STATE_PREATTACHED)) { + VChangeState_r(vp, VOL_STATE_UNATTACHED); + VDeregisterVolOp_r(vp); + code = SYNC_OK; + } else { + code = SYNC_DENIED; + res->hdr.reason = FSYNC_BAD_STATE; + } + } else { + code = SYNC_OK; /* XXX is this really a good idea? */ + res->hdr.reason = FSYNC_WRONG_PART; + } + } else { + res->hdr.reason = FSYNC_UNKNOWN_VOLID; } #endif - return SYNC_OK; + done: + return code; } #ifdef AFS_DEMAND_ATTACH_FS /** - * force a volume into the hard error state. + * service an FSYNC request to transition a volume to the hard error state. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume transitioned to hard error state + * @retval SYNC_FAILED invalid command protocol message + * @retval SYNC_DENIED (see note) + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_FORCE_ERROR + * + * @note SYNC_DENIED is returned in the following cases: + * - no partition name is specified (reason field set to + * FSYNC_WRONG_PART). + * - volume id not known to fileserver (reason field set + * to FSYNC_UNKNOWN_VOLID). + * + * @note demand attach fileserver only + * + * @internal */ static afs_int32 FSYNC_com_VolError(FSSYNC_VolOp_command * vcom, SYNC_response * res) { Error error; Volume * vp; - afs_int32 code = SYNC_DENIED; + afs_int32 code = SYNC_FAILED; + + if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { + res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; + goto done; + } vp = VLookupVolume_r(&error, vcom->vop->volume, NULL); - if (vp && !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName)) { - memset(&vp->salvage, 0, sizeof(vp->salvage)); - VChangeState_r(vp, VOL_STATE_ERROR); - code = SYNC_OK; + if (vp) { + if (FSYNC_partMatch(vcom, vp, 0)) { + /* null out salvsync control state, as it's no longer relevant */ + memset(&vp->salvage, 0, sizeof(vp->salvage)); + VChangeState_r(vp, VOL_STATE_ERROR); + code = SYNC_OK; + } else { + res->hdr.reason = FSYNC_WRONG_PART; + } } else { res->hdr.reason = FSYNC_UNKNOWN_VOLID; } - + + done: return code; } -#endif +#endif /* AFS_DEMAND_ATTACH_FS */ +/** + * service an FSYNC request to break all callbacks for this volume. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK callback breaks scheduled for volume + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_BREAKCBKS + * + * @note demand attach fileserver only + * + * @todo should do partition matching + * + * @internal + */ static afs_int32 FSYNC_com_VolBreakCBKs(FSSYNC_VolOp_command * vcom, SYNC_response * res) { @@ -867,13 +1068,35 @@ FSYNC_com_VolBreakCBKs(FSSYNC_VolOp_command * vcom, SYNC_response * res) return SYNC_OK; } +/** + * service an FSYNC request to return the Volume object. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume object returned to caller + * @retval SYNC_FAILED bad command packet, or failed to locate volume object + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_QUERY + * + * @internal + */ static afs_int32 FSYNC_com_VolQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res) { - afs_int32 code = SYNC_OK; + afs_int32 code = SYNC_FAILED; Error error; Volume * vp; + if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { + res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; + goto done; + } + #ifdef AFS_DEMAND_ATTACH_FS vp = VLookupVolume_r(&error, vcom->vop->volume, NULL); #else /* !AFS_DEMAND_ATTACH_FS */ @@ -881,58 +1104,101 @@ FSYNC_com_VolQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res) #endif /* !AFS_DEMAND_ATTACH_FS */ if (vp) { - assert(sizeof(Volume) <= res->payload.len); - memcpy(res->payload.buf, vp, sizeof(Volume)); - res->hdr.response_len += sizeof(Volume); + if (FSYNC_partMatch(vcom, vp, 1)) { + if (res->payload.len >= sizeof(Volume)) { + memcpy(res->payload.buf, vp, sizeof(Volume)); + res->hdr.response_len += sizeof(Volume); + code = SYNC_OK; + } else { + res->hdr.reason = SYNC_REASON_PAYLOAD_TOO_BIG; + } + } else { + res->hdr.reason = FSYNC_WRONG_PART; + } #ifndef AFS_DEMAND_ATTACH_FS VPutVolume_r(vp); #endif } else { res->hdr.reason = FSYNC_UNKNOWN_VOLID; - code = SYNC_FAILED; } + + done: return code; } +/** + * service an FSYNC request to return the Volume header. + * + * @param[in] vcom pointer command object + * @param[out] res object in which to store response packet + * + * @return operation status + * @retval SYNC_OK volume header returned to caller + * @retval SYNC_FAILED bad command packet, or failed to locate volume header + * + * @note this is an FSYNC RPC server stub + * + * @note this procedure handles the following FSSYNC command codes: + * - FSYNC_VOL_QUERY_HDR + * + * @internal + */ static afs_int32 FSYNC_com_VolHdrQuery(FSSYNC_VolOp_command * vcom, SYNC_response * res) { - afs_int32 code = SYNC_OK; + afs_int32 code = SYNC_FAILED; Error error; Volume * vp; int hdr_ok = 0; + if (SYNC_verifyProtocolString(vcom->vop->partName, sizeof(vcom->vop->partName))) { + res->hdr.reason = SYNC_REASON_MALFORMED_PACKET; + goto done; + } + if (res->payload.len < sizeof(VolumeDiskData)) { + res->hdr.reason = SYNC_REASON_PAYLOAD_TOO_BIG; + goto done; + } + #ifdef AFS_DEMAND_ATTACH_FS vp = VLookupVolume_r(&error, vcom->vop->volume, NULL); - if (vp && - (vp->header != NULL) && - (V_attachFlags(vp) & VOL_HDR_ATTACHED) && - (V_attachFlags(vp) & VOL_HDR_LOADED)) { - hdr_ok = 1; - } #else /* !AFS_DEMAND_ATTACH_FS */ vp = VGetVolume_r(&error, vcom->vop->volume); - if (vp && vp->header) { - hdr_ok = 1; - } +#endif + + if (vp) { + if (FSYNC_partMatch(vcom, vp, 1)) { +#ifdef AFS_DEMAND_ATTACH_FS + if ((vp->header == NULL) || + !(V_attachFlags(vp) & VOL_HDR_ATTACHED) || + !(V_attachFlags(vp) & VOL_HDR_LOADED)) { + res->hdr.reason = FSYNC_HDR_NOT_ATTACHED; + goto done; + } +#else /* !AFS_DEMAND_ATTACH_FS */ + if (!vp || !vp->header) { + res->hdr.reason = FSYNC_HDR_NOT_ATTACHED; + goto done; + } #endif /* !AFS_DEMAND_ATTACH_FS */ + } else { + res->hdr.reason = FSYNC_WRONG_PART; + goto done; + } + } else { + res->hdr.reason = FSYNC_UNKNOWN_VOLID; + goto done; + } load_done: - if (hdr_ok) { - assert(sizeof(VolumeDiskData) <= res->payload.len); - memcpy(res->payload.buf, &V_disk(vp), sizeof(VolumeDiskData)); - res->hdr.response_len += sizeof(VolumeDiskData); + memcpy(res->payload.buf, &V_disk(vp), sizeof(VolumeDiskData)); + res->hdr.response_len += sizeof(VolumeDiskData); #ifndef AFS_DEMAND_ATTACH_FS - VPutVolume_r(vp); + VPutVolume_r(vp); #endif - } else { - if (vp) { - res->hdr.reason = FSYNC_HDR_NOT_ATTACHED; - } else { - res->hdr.reason = FSYNC_UNKNOWN_VOLID; - } - code = SYNC_FAILED; - } + code = SYNC_OK; + + done: return code; } @@ -1148,6 +1414,17 @@ FSYNC_com_StatsOpVLRU(FSSYNC_StatsOp_command * scom, SYNC_response * res) } #endif /* AFS_DEMAND_ATTACH_FS */ +/** + * populate an FSSYNC_VolOp_info object from a command packet object. + * + * @param[in] vcom pointer to command packet + * @param[out] info pointer to info object which will be populated + * + * @note FSSYNC_VolOp_info objects are attached to Volume objects when + * a volume operation is commenced. + * + * @internal + */ static void FSYNC_com_to_info(FSSYNC_VolOp_command * vcom, FSSYNC_VolOp_info * info) { @@ -1155,6 +1432,33 @@ FSYNC_com_to_info(FSSYNC_VolOp_command * vcom, FSSYNC_VolOp_info * info) memcpy(&info->vop, vcom->vop, sizeof(FSSYNC_VolOp_hdr)); } +/** + * check whether command packet partition name matches volume + * object's partition name. + * + * @param[in] vcom pointer to command packet + * @param[in] vp pointer to volume object + * @param[in] match_anon anon matching control flag (see note below) + * + * @return whether partitions match + * @retval 0 partitions do NOT match + * @retval 1 partitions match + * + * @note if match_anon is non-zero, then this function will return a + * positive match for a zero-length partition string in the + * command packet. + * + * @internal + */ +static int +FSYNC_partMatch(FSSYNC_VolOp_command * vcom, Volume * vp, int match_anon) +{ + return ((match_anon && vcom->vop->partName[0] == 0) || + (strncmp(vcom->vop->partName, V_partition(vp)->name, + sizeof(vcom->vop->partName)) == 0)); +} + + static void FSYNC_Drop(int fd) { diff --git a/src/vol/fssync.h b/src/vol/fssync.h index b506b88..2596cbe 100644 --- a/src/vol/fssync.h +++ b/src/vol/fssync.h @@ -20,7 +20,7 @@ #define __fssync_h_ -#define FSYNC_PROTO_VERSION 2 +#define FSYNC_PROTO_VERSION 3 /** @@ -67,6 +67,8 @@ enum FSYNCReasonCode { FSYNC_NO_PENDING_VOL_OP = SYNC_REASON_CODE_DECL(7), /**< no volume operation pending */ FSYNC_VOL_PKG_ERROR = SYNC_REASON_CODE_DECL(8), /**< error in the volume package */ FSYNC_UNKNOWN_VNID = SYNC_REASON_CODE_DECL(9), /**< vnode id not known by fileserver */ + FSYNC_WRONG_PART = SYNC_REASON_CODE_DECL(10),/**< volume attached on different partition */ + FSYNC_BAD_STATE = SYNC_REASON_CODE_DECL(11),/**< current volume state does not allow this operation */ FSYNC_REASON_CODE_END }; diff --git a/src/vol/partition.c b/src/vol/partition.c index 6874522..6db056e 100644 --- a/src/vol/partition.c +++ b/src/vol/partition.c @@ -281,8 +281,10 @@ VInitPartition_r(char *path, char *devname, Device dev) VSetPartitionDiskUsage_r(dp); #ifdef AFS_DEMAND_ATTACH_FS AddPartitionToTable_r(dp); - queue_Init(&dp->vol_list); + queue_Init(&dp->vol_list.head); assert(pthread_cond_init(&dp->vol_list.cv, NULL) == 0); + dp->vol_list.len = 0; + dp->vol_list.busy = 0; #endif /* AFS_DEMAND_ATTACH_FS */ } @@ -1264,9 +1266,28 @@ VUnlockPartition(char *name) } #ifdef AFS_DEMAND_ATTACH_FS + /* XXX not sure this will work on AFS_NT40_ENV * needs to be tested! */ + +/** + * lookup a disk partition object by its index number. + * + * @param[in] id partition index number + * @param[in] abortp see abortp usage note below + * + * @return disk partition object + * @retval NULL no such disk partition + * + * @note when abortp is non-zero, lookups which would return + * NULL will result in an assertion failure + * + * @pre VOL_LOCK must be held + * + * @internal volume package internal use only + */ + struct DiskPartition64 * VGetPartitionById_r(afs_int32 id, int abortp) { @@ -1282,6 +1303,19 @@ VGetPartitionById_r(afs_int32 id, int abortp) return dp; } +/** + * lookup a disk partition object by its index number. + * + * @param[in] id partition index number + * @param[in] abortp see abortp usage note below + * + * @return disk partition object + * @retval NULL no such disk partition + * + * @note when abortp is non-zero, lookups which would return + * NULL will result in an assertion failure + */ + struct DiskPartition64 * VGetPartitionById(afs_int32 id, int abortp) { diff --git a/src/vol/purge.c b/src/vol/purge.c index a617270..8c6c630 100644 --- a/src/vol/purge.c +++ b/src/vol/purge.c @@ -66,6 +66,10 @@ VPurgeVolume(Error * ec, Volume * vp) struct DiskPartition64 *tpartp = vp->partition; char purgePath[MAXPATHLEN]; + /* so VCheckDetach doesn't try to update the volume header and + * dump spurious errors into the logs */ + V_inUse(vp) = 0; + /* N.B. it's important here to use the partition pointed to by the * volume header. This routine can, under some circumstances, be called * when two volumes with the same id exist on different partitions. diff --git a/src/vol/salvsync-server.c b/src/vol/salvsync-server.c index 21125df..7b0a3bd 100644 --- a/src/vol/salvsync-server.c +++ b/src/vol/salvsync-server.c @@ -380,7 +380,7 @@ SALVSYNC_com(int fd) sres.res = &res; SALV_cnt++; - if (SYNC_getCom(fd, &com)) { + if (SYNC_getCom(&salvsync_server_state, fd, &com)) { Log("SALVSYNC_com: read failed; dropping connection (cnt=%d)\n", SALV_cnt); SALVSYNC_Drop(fd); return; @@ -415,6 +415,8 @@ SALVSYNC_com(int fd) goto respond; } + res.hdr.com_seq = com.hdr.com_seq; + VOL_LOCK; switch (com.hdr.command) { case SALVSYNC_NOP: @@ -449,7 +451,7 @@ SALVSYNC_com(int fd) VOL_UNLOCK; respond: - SYNC_putRes(fd, &res); + SYNC_putRes(&salvsync_server_state, fd, &res); if (res.hdr.flags & SYNC_FLAG_CHANNEL_SHUTDOWN) { SALVSYNC_Drop(fd); } diff --git a/src/vol/salvsync.h b/src/vol/salvsync.h index 58c772a..748b629 100644 --- a/src/vol/salvsync.h +++ b/src/vol/salvsync.h @@ -12,7 +12,7 @@ * salvage server interface */ #ifndef _AFS_VOL_SALVSYNC_H -#define _AFS_VOL_SALVSYNC_H +#define _AFS_VOL_SALVSYNC_H 1 #define SALSRV_EXIT_VOLGROUP_LINK 10 @@ -24,7 +24,8 @@ #define SALVSYNC_PROTO_VERSION_V1 1 #define SALVSYNC_PROTO_VERSION_V2 2 -#define SALVSYNC_PROTO_VERSION SALVSYNC_PROTO_VERSION_V2 +#define SALVSYNC_PROTO_VERSION_V3 3 +#define SALVSYNC_PROTO_VERSION SALVSYNC_PROTO_VERSION_V3 /** diff --git a/src/vol/vol-salvage.c b/src/vol/vol-salvage.c index 38a2bd7..a01a4d4 100644 --- a/src/vol/vol-salvage.c +++ b/src/vol/vol-salvage.c @@ -739,7 +739,7 @@ SalvageFileSys1(struct DiskPartition64 *partP, VolumeId singleVolumeNumber) if ((programType != salvageServer) && !VConnectFS()) { Abort("Couldn't connect to file server\n"); } - AskOffline(singleVolumeNumber); + AskOffline(singleVolumeNumber, partP->name); } else { if (!Showmode) Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n", @@ -1289,7 +1289,7 @@ GetVolumeSummary(VolumeId singleVolumeNumber) (void)afs_snprintf(nameShouldBe, sizeof nameShouldBe, VFORMAT, vsp->header.id); if (singleVolumeNumber) - AskOffline(vsp->header.id); + AskOffline(vsp->header.id, fileSysPartition->name); if (strcmp(nameShouldBe, dp->d_name)) { if (!Showmode) Log("Volume header file %s is incorrectly named; %sdeleted (it will be recreated later, if necessary)\n", dp->d_name, (Testing ? "it would have been " : "")); @@ -3180,12 +3180,12 @@ MaybeZapVolume(register struct InodeSummary *isp, char *message, int deleteMe, void -AskOffline(VolumeId volumeId) +AskOffline(VolumeId volumeId, char * partition) { afs_int32 code, i; for (i = 0; i < 3; i++) { - code = FSYNC_VolOp(volumeId, NULL, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL); + code = FSYNC_VolOp(volumeId, partition, FSYNC_VOL_OFF, FSYNC_SALVAGE, NULL); if (code == SYNC_OK) { break; diff --git a/src/vol/vol-salvage.h b/src/vol/vol-salvage.h index ad5b5f9..3eafb9a 100644 --- a/src/vol/vol-salvage.h +++ b/src/vol/vol-salvage.h @@ -221,7 +221,7 @@ extern void Exit(int code); extern int Fork(void); extern int Wait(char *prog); extern char *ToString(char *s); -extern void AskOffline(VolumeId volumeId); +extern void AskOffline(VolumeId volumeId, char * partition); extern void AskOnline(VolumeId volumeId, char *partition); extern void CheckLogFile(char * log_path); #ifndef AFS_NT40_ENV diff --git a/src/vol/volume.c b/src/vol/volume.c index e5227be..617e134 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -383,6 +383,12 @@ static void VLRU_SwitchQueues(volatile Volume * vp, int new_idx, int append); static int VCheckSoftDetach(volatile Volume * vp, afs_uint32 thresh); static int VCheckSoftDetachCandidate(volatile Volume * vp, afs_uint32 thresh); static int VSoftDetachVolume_r(volatile Volume * vp, afs_uint32 thresh); + + +pthread_key_t VThread_key; +VThreadOptions_t VThread_defaults = { + 0 /**< allow salvsync */ +}; #endif /* AFS_DEMAND_ATTACH_FS */ @@ -439,6 +445,7 @@ VInitVolumePackage(ProgramType pt, afs_uint32 nLargeVnodes, afs_uint32 nSmallVno } else { VLRU_SetOptions(VLRU_SET_ENABLED, 0); } + assert(pthread_key_create(&VThread_key, NULL) == 0); #endif #ifdef AFS_PTHREAD_ENV @@ -577,8 +584,8 @@ VInitVolumePackage(ProgramType pt, afs_uint32 nLargeVnodes, afs_uint32 nSmallVno #ifdef FSSYNC_BUILD_CLIENT if (programType == volumeUtility && connect) { if (!VConnectFS()) { - Log("Unable to connect to file server; aborted\n"); - exit(1); + Log("Unable to connect to file server; will retry at need\n"); + /*exit(1);*/ } } #ifdef AFS_DEMAND_ATTACH_FS @@ -1681,7 +1688,9 @@ VPreAttachVolumeByVp_r(Error * ec, /* link the volume with its associated vice partition */ vp->device = partp->device; vp->partition = partp; + vp->hashid = vid; + vp->specialStatus = 0; /* if we dropped the lock, reacquire the lock, * check for pre-attach races, and then add @@ -1790,12 +1799,12 @@ VAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode) VWaitExclusiveState_r(vp); /* at this point state must be one of: - * UNATTACHED, - * ATTACHED, - * SHUTTING_DOWN, - * GOING_OFFLINE, - * SALVAGING, - * ERROR + * - UNATTACHED + * - ATTACHED + * - SHUTTING_DOWN + * - GOING_OFFLINE + * - SALVAGING + * - ERROR */ if (vp->specialStatus == VBUSY) @@ -2282,7 +2291,7 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h res.payload.buf = &vp->header->diskstuff; if (FSYNC_VolOp(volumeId, - VPartitionPath(partp), + partp->name, FSYNC_VOL_QUERY_HDR, FSYNC_WHATEVER, &res) == SYNC_OK) { @@ -2500,10 +2509,10 @@ attach2(Error * ec, VolId volumeId, char *path, register struct VolumeHeader * h AddVolumeToHashTable(vp, V_id(vp)); #ifdef AFS_DEMAND_ATTACH_FS - AddVolumeToVByPList_r(vp); - VLRU_Add_r(vp); if ((programType != fileServer) || (V_inUse(vp) == fileServer)) { + AddVolumeToVByPList_r(vp); + VLRU_Add_r(vp); VChangeState_r(vp, VOL_STATE_ATTACHED); } else { VChangeState_r(vp, VOL_STATE_UNATTACHED); @@ -2692,6 +2701,20 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flag Volume *avp, * rvp = hint; #endif + /* + * if VInit is zero, the volume package dynamic + * data structures have not been initialized yet, + * and we must immediately return an error + */ + if (VInit == 0) { + vp = NULL; + *ec = VOFFLINE; + if (client_ec) { + *client_ec = VOFFLINE; + } + goto not_inited; + } + #ifdef AFS_DEMAND_ATTACH_FS if (rvp) { VCreateReservation_r(rvp); @@ -2749,8 +2772,8 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flag /* short circuit with VNOVOL in the following circumstances: * - * VOL_STATE_ERROR - * VOL_STATE_SHUTTING_DOWN + * - VOL_STATE_ERROR + * - VOL_STATE_SHUTTING_DOWN */ if ((V_attachState(vp) == VOL_STATE_ERROR) || (V_attachState(vp) == VOL_STATE_SHUTTING_DOWN)) { @@ -2762,20 +2785,23 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flag /* * short circuit with VOFFLINE in the following circumstances: * - * VOL_STATE_UNATTACHED + * - VOL_STATE_UNATTACHED */ if (V_attachState(vp) == VOL_STATE_UNATTACHED) { - *ec = VOFFLINE; + if (vp->specialStatus) { + *ec = vp->specialStatus; + } else { + *ec = VOFFLINE; + } vp = NULL; break; } /* allowable states: - * UNATTACHED - * PREATTACHED - * ATTACHED - * GOING_OFFLINE - * SALVAGING + * - PREATTACHED + * - ATTACHED + * - GOING_OFFLINE + * - SALVAGING */ if (vp->salvage.requested) { @@ -2863,20 +2889,34 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flag * this test MUST happen after the volume header is loaded */ if (vp->pending_vol_op && !VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) { - if (client_ec) { - /* see CheckVnode() in afsfileprocs.c for an explanation - * of this error code logic */ - afs_uint32 now = FT_ApproxTime(); - if ((vp->stats.last_vol_op + (10 * 60)) >= now) { - *client_ec = VBUSY; - } else { - *client_ec = VRESTARTING; - } - } - *ec = VOFFLINE; - ReleaseVolumeHeader(vp->header); - vp = NULL; - break; + /* + * volume cannot remain online during this volume operation. + * notify client. + */ + if (vp->specialStatus) { + /* + * special status codes outrank normal VOFFLINE code + */ + *ec = vp->specialStatus; + if (client_ec) { + *client_ec = vp->specialStatus; + } + } else { + if (client_ec) { + /* see CheckVnode() in afsfileprocs.c for an explanation + * of this error code logic */ + afs_uint32 now = FT_ApproxTime(); + if ((vp->stats.last_vol_op + (10 * 60)) >= now) { + *client_ec = VBUSY; + } else { + *client_ec = VRESTARTING; + } + } + *ec = VOFFLINE; + } + ReleaseVolumeHeader(vp->header); + vp = NULL; + break; } #endif /* AFS_DEMAND_ATTACH_FS */ @@ -2944,6 +2984,7 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int flag } #endif /* AFS_DEMAND_ATTACH_FS */ + not_inited: assert(vp || *ec); return vp; } @@ -3450,8 +3491,8 @@ VCheckDetach(register Volume * vp) V_inUse(vp) = 0; VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF); if (ec) { - Log("VCheckDetach: failed to clear inUse failed during detachment of volid %u\n", - vp->hashid); + Log("VCheckDetach: volume header update for volume %u " + "failed with errno %d\n", vp->hashid, errno); } } VReleaseVolumeHandles_r(vp); @@ -3483,8 +3524,8 @@ VCheckDetach(register Volume * vp) V_inUse(vp) = 0; VUpdateVolume_r(&ec, vp, VOL_UPDATE_NOFORCEOFF); if (ec) { - Log("VCheckDetach: failed to clear inUse failed during detachment of volid %u\n", - vp->hashid); + Log("VCheckDetach: volume header update for volume %u failed with errno %d\n", + vp->hashid, errno); } } VReleaseVolumeHandles_r(vp); @@ -3977,6 +4018,7 @@ VScheduleSalvage_r(Volume * vp) int code, ret=0; #ifdef SALVSYNC_BUILD_CLIENT VolState state_save; + VThreadOptions_t * thread_opts; char partName[16]; if (vp->nWaiters || vp->nUsers) { @@ -3987,6 +4029,21 @@ VScheduleSalvage_r(Volume * vp) if (vp->stats.salvages >= SALVAGE_COUNT_MAX) return 1; + /* + * don't perform salvsync ops on certain threads + */ + thread_opts = pthread_getspecific(VThread_key); + if (thread_opts == NULL) { + thread_opts = &VThread_defaults; + } + if (thread_opts->disallow_salvsync) { + return 1; + } + + /* + * XXX the scheduling process should really be done asynchronously + * to avoid fssync deadlocks + */ if (!vp->salvage.scheduled) { /* if we haven't previously scheduled a salvage, do so now * @@ -3998,7 +4055,6 @@ VScheduleSalvage_r(Volume * vp) */ strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName)); state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ); - V_attachFlags(vp) |= VOL_IS_BUSY; VOL_UNLOCK; /* can't use V_id() since there's no guarantee @@ -4011,7 +4067,6 @@ VScheduleSalvage_r(Volume * vp) NULL); VOL_LOCK; VChangeState_r(vp, state_save); - V_attachFlags(vp) &= ~(VOL_IS_BUSY); if (code == SYNC_OK) { vp->salvage.scheduled = 1; @@ -4049,9 +4104,8 @@ VScheduleSalvage_r(Volume * vp) * * @pre VOL_LOCK is held. * - * @post salvageserver is sent a request to cancel the volume salvage - * - * @todo should set exclusive state and drop glock around salvsync call + * @post salvageserver is sent a request to cancel the volume salvage. + * volume is transitioned to a hard error state. * * @internal volume package internal use only. */ @@ -4062,14 +4116,24 @@ VCancelSalvage_r(Volume * vp, int reason) #ifdef SALVSYNC_BUILD_CLIENT if (vp->salvage.scheduled) { + VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ); + VOL_UNLOCK; + + /* can't use V_id() since there's no guarantee + * we have the disk data header at this point */ code = SALVSYNC_SalvageVolume(vp->hashid, VPartitionPath(vp->partition), SALVSYNC_CANCEL, reason, 0, NULL); + + VOL_LOCK; + VChangeState_r(vp, VOL_STATE_ERROR); + if (code == SYNC_OK) { vp->salvage.scheduled = 0; + vp->salvage.requested = 0; } else { ret = 1; } @@ -5614,7 +5678,6 @@ VLRU_ScannerThread(void * args) min_delay = 0; min_idx = i; overdue = 1; - break; } } diff --git a/src/vol/volume.h b/src/vol/volume.h index 779ec00..079d9a2 100644 --- a/src/vol/volume.h +++ b/src/vol/volume.h @@ -222,6 +222,17 @@ typedef enum { #define VLRU_DEFAULT_OFFLINE_INTERVAL (60*2) /* 2 minutes */ #define VLRU_DEFAULT_OFFLINE_MAX 8 /* 8 volumes */ + +/** + * DAFS thread-specific options structure + */ +typedef struct VThreadOptions { + int disallow_salvsync; /**< whether or not salvsync calls are allowed + * on this thread (deadlock prevention). */ +} VThreadOptions_t; +extern pthread_key_t VThread_key; +extern VThreadOptions_t VThread_defaults; + #endif /* AFS_DEMAND_ATTACH_FS */ diff --git a/src/volser/volprocs.c b/src/volser/volprocs.c index 9162a05..34c7fef 100644 --- a/src/volser/volprocs.c +++ b/src/volser/volprocs.c @@ -352,7 +352,6 @@ SAFSVolNukeVolume(struct rx_call *acid, afs_int32 apartID, afs_int32 avolID) afs_int32 VolNukeVolume(struct rx_call *acid, afs_int32 apartID, afs_int32 avolID) { - register char *tp; char partName[50]; afs_int32 error; register afs_int32 code; @@ -365,10 +364,8 @@ VolNukeVolume(struct rx_call *acid, afs_int32 apartID, afs_int32 avolID) if (DoLogging) Log("%s is executing VolNukeVolume %u\n", caller, avolID); - tp = volutil_PartitionName(apartID); - if (!tp) + if (volutil_PartitionName2_r(apartID, partName, sizeof(partName)) != 0) return VOLSERNOVOL; - strcpy(partName, tp); /* remember it for later */ /* we first try to attach the volume in update mode, so that the file * server doesn't try to use it (and abort) while (or after) we delete it. * If we don't get the volume, that's fine, too. We just won't put it back. @@ -1448,6 +1445,7 @@ VolSetForwarding(struct rx_call *acid, afs_int32 atid, afs_int32 anewsite) { register struct volser_trans *tt; char caller[MAXKTCNAMELEN]; + char partName[16]; if (!afsconf_SuperUser(tdir, acid, caller)) return VOLSERBAD_ACCESS; /*not a super user */ @@ -1462,7 +1460,10 @@ VolSetForwarding(struct rx_call *acid, afs_int32 atid, afs_int32 anewsite) } strcpy(tt->lastProcName, "SetForwarding"); tt->rxCallPtr = acid; - FSYNC_VolOp(tt->volid, NULL, FSYNC_VOL_MOVE, anewsite, NULL); + if (volutil_PartitionName2_r(tt->partition, partName, sizeof(partName)) != 0) { + partName[0] = '\0'; + } + FSYNC_VolOp(tt->volid, partName, FSYNC_VOL_MOVE, anewsite, NULL); tt->rxCallPtr = (struct rx_call *)0; if (TRELE(tt)) return VOLSERTRELE_ERROR; @@ -1830,6 +1831,9 @@ typedef struct { * * @pre handle object must have a valid pointer and enumeration value * + * @note passing a NULL value for vp means that the fileserver doesn't + * know about this particular volume, thus implying it is offline. + * * @return operation status * @retval 0 success * @retval 1 failure @@ -1871,7 +1875,8 @@ FillVolInfo(Volume * vp, VolumeDiskData * hdr, volint_info_handle_t * handle) * along with the blessed and inService flags from the header. * -- tkeiser 11/27/2007 */ - if ((V_attachState(vp) == VOL_STATE_UNATTACHED) || + if (!vp || + (V_attachState(vp) == VOL_STATE_UNATTACHED) || VIsErrorState(V_attachState(vp)) || !hdr->inService || !hdr->blessed) { @@ -1889,7 +1894,8 @@ FillVolInfo(Volume * vp, VolumeDiskData * hdr, volint_info_handle_t * handle) #ifdef AFS_DEMAND_ATTACH_FS /* see comment above where we set inUse bit */ - if (hdr->needsSalvaged || VIsErrorState(V_attachState(vp))) { + if (hdr->needsSalvaged || + (vp && VIsErrorState(V_attachState(vp)))) { handle->volinfo_ptr.base->needsSalvaged = 1; } else { handle->volinfo_ptr.base->needsSalvaged = 0; @@ -1939,28 +1945,44 @@ FillVolInfo(Volume * vp, VolumeDiskData * hdr, volint_info_handle_t * handle) * get struct Volume out of the fileserver. * * @param[in] volumeId volumeId for which we want state information - * @param[out] vp pointer to Volume object + * @param[in] pname partition name string + * @param[inout] vp pointer to pointer to Volume object which + * will be populated (see note) * * @return operation status - * @retval 0 success - * @retval nonzero failure + * @retval 0 success + * @retval non-zero failure + * + * @note if FSYNC_VolOp fails in certain ways, *vp will be set to NULL + * + * @internal */ static int -GetVolObject(afs_uint32 volumeId, Volume * vp) +GetVolObject(afs_uint32 volumeId, char * pname, Volume ** vp) { int code; SYNC_response res; res.hdr.response_len = sizeof(res.hdr); - res.payload.buf = vp; - res.payload.len = sizeof(*vp); + res.payload.buf = *vp; + res.payload.len = sizeof(Volume); code = FSYNC_VolOp(volumeId, - "", + pname, FSYNC_VOL_QUERY, 0, &res); + if (code != SYNC_OK) { + switch (res.hdr.reason) { + case FSYNC_WRONG_PART: + case FSYNC_UNKNOWN_VOLID: + *vp = NULL; + code = SYNC_OK; + break; + } + } + return code; } @@ -1997,9 +2019,10 @@ GetVolInfo(afs_uint32 partId, vol_info_list_mode_t mode) { int code = -1; + int reason; afs_int32 error; struct volser_trans *ttc = NULL; - struct Volume fs_tv, *tv = NULL; + struct Volume fs_tv_buf, *fs_tv = &fs_tv_buf, *tv = NULL; ttc = NewTrans(volumeId, partId); if (!ttc) { @@ -2046,13 +2069,13 @@ GetVolInfo(afs_uint32 partId, } #ifdef AFS_DEMAND_ATTACH_FS - if (GetVolObject(volumeId, &fs_tv)) { + if (GetVolObject(volumeId, pname, &fs_tv) != SYNC_OK) { goto drop; } #endif /* ok, we have all the data we need; fill in the on-wire struct */ - code = FillVolInfo(&fs_tv, &tv->header->diskstuff, handle); + code = FillVolInfo(fs_tv, &tv->header->diskstuff, handle); drop: -- 1.9.4