extern struct host *h_Alloc_r(register struct rx_connection *r_con);
extern struct host *h_Lookup_r(afs_uint32 hostaddr, afs_uint16 hport,
int *heldp);
-extern void hashInsert_r(afs_uint32 addr, afs_uint16 port,
- struct host* host);
extern struct host *h_LookupUuid_r(afsUUID * uuidp);
extern void h_Enumerate(int (*proc) (), char *param);
extern void h_Enumerate_r(int (*proc) (), struct host *enumstart, char *param);
CFLAGS = ${COMMON_CFLAGS} -D${SYS_NAME} ${FSINCLUDES} ${XCFLAGS} ${ARCHFLAGS} -DFSSYNC_BUILD_SERVER -DFSSYNC_BUILD_CLIENT
-PUBLICHEADERS=nfs.h vnode.h viceinode.h volume.h voldefs.h partition.h \
- fssync.h ihandle.h namei_ops.h salvsync.h daemon_com.h
+PUBLICHEADERS=nfs.h vnode.h viceinode.h volume.h volume_inline.h voldefs.h partition.h \
+ fssync.h ihandle.h namei_ops.h salvsync.h daemon_com.h vnode_inline.h
VLIBOBJS=vnode.o volume.o vutil.o partition.o fssync-server.o fssync-client.o \
clone.o nuke.o devname.o listinodes.o common.o ihandle.o purge.o \
$(XFS_SIZE_CHECK) \
${TOP_INCDIR}/afs/nfs.h \
${TOP_INCDIR}/afs/vnode.h \
+ ${TOP_INCDIR}/afs/vnode_inline.h \
${TOP_INCDIR}/afs/viceinode.h \
${TOP_INCDIR}/afs/volume.h \
+ ${TOP_INCDIR}/afs/volume_inline.h \
${TOP_INCDIR}/afs/voldefs.h \
${TOP_INCDIR}/afs/partition.h \
${TOP_INCDIR}/afs/fssync.h \
${TOP_INCDIR}/afs/vnode.h: vnode.h
${INSTALL_DATA} $? $@
+${TOP_INCDIR}/afs/vnode_inline.h: vnode_inline.h
+ ${INSTALL_DATA} $? $@
+
${TOP_INCDIR}/afs/viceinode.h: viceinode.h
${INSTALL_DATA} $? $@
${TOP_INCDIR}/afs/volume.h: volume.h
${INSTALL_DATA} $? $@
+${TOP_INCDIR}/afs/volume_inline.h: volume_inline.h
+ ${INSTALL_DATA} $? $@
+
${TOP_INCDIR}/afs/voldefs.h: voldefs.h
${INSTALL_DATA} $? $@
${INSTALL_PROGRAM} volinfo ${DESTDIR}${afssrvsbindir}/volinfo
${INSTALL_DATA} nfs.h ${DESTDIR}${includedir}/afs/nfs.h
${INSTALL_DATA} vnode.h ${DESTDIR}${includedir}/afs/vnode.h
+ ${INSTALL_DATA} vnode_inline.h ${DESTDIR}${includedir}/afs/vnode_inline.h
${INSTALL_DATA} viceinode.h ${DESTDIR}${includedir}/afs/viceinode.h
${INSTALL_DATA} volume.h ${DESTDIR}${includedir}/afs/volume.h
+ ${INSTALL_DATA} volume_inline.h ${DESTDIR}${includedir}/afs/volume_inline.h
${INSTALL_DATA} voldefs.h ${DESTDIR}${includedir}/afs/voldefs.h
${INSTALL_DATA} partition.h ${DESTDIR}${includedir}/afs/partition.h
${INSTALL_DATA} fssync.h ${DESTDIR}${includedir}/afs/fssync.h
${INSTALL_PROGRAM} volinfo ${DEST}/root.server/usr/afs/bin/volinfo
${INSTALL_DATA} nfs.h ${DEST}/include/afs/nfs.h
${INSTALL_DATA} vnode.h ${DEST}/include/afs/vnode.h
+ ${INSTALL_DATA} vnode_inline.h ${DEST}/include/afs/vnode_inline.h
${INSTALL_DATA} viceinode.h ${DEST}/include/afs/viceinode.h
${INSTALL_DATA} volume.h ${DEST}/include/afs/volume.h
+ ${INSTALL_DATA} volume_inline.h ${DEST}/include/afs/volume_inline.h
${INSTALL_DATA} voldefs.h ${DEST}/include/afs/voldefs.h
${INSTALL_DATA} partition.h ${DEST}/include/afs/partition.h
${INSTALL_DATA} fssync.h ${DEST}/include/afs/fssync.h
/*
- * Copyright 2006-2007, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
+static int SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * res);
+
+/* daemon com SYNC general interfaces */
+
+/**
+ * fill in sockaddr structure.
+ *
+ * @param[in] endpoint pointer to sync endpoint object
+ * @param[out] addr pointer to sockaddr structure
+ *
+ * @post sockaddr structure populated using information from
+ * endpoint structure.
+ */
+void
+SYNC_getAddr(SYNC_endpoint_t * endpoint, SYNC_sockaddr_t * addr)
+{
#ifdef USE_UNIX_SOCKETS
-static getport(SYNC_client_state * state, struct sockaddr_un *addr);
-#else /* USE_UNIX_SOCKETS */
-static getport(SYNC_client_state * state, struct sockaddr_in *addr);
+ char tbuffer[AFSDIR_PATH_MAX];
#endif /* USE_UNIX_SOCKETS */
-static int SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * res);
+ memset(addr, 0, sizeof(*addr));
+
+#ifdef USE_UNIX_SOCKETS
+ strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
+ endpoint->un, NULL);
+ addr->sun_family = AF_UNIX;
+ strncpy(addr->sun_path, tbuffer, (sizeof(struct sockaddr_un) - sizeof(short)));
+#else /* !USE_UNIX_SOCKETS */
+#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
+ addr->sin_len = sizeof(struct sockaddr_in);
+#endif
+ addr->sin_addr.s_addr = htonl(0x7f000001);
+ addr->sin_family = AF_INET; /* was localhost->h_addrtype */
+ addr->sin_port = htons(endpoint->in); /* XXXX htons not _really_ neccessary */
+#endif /* !USE_UNIX_SOCKETS */
+}
+
+/**
+ * get a socket descriptor of the appropriate domain.
+ *
+ * @param[in] endpoint pointer to sync endpoint object
+ *
+ * @return socket descriptor
+ *
+ * @post socket of domain specified in endpoint structure is created and
+ * returned to caller.
+ */
+int
+SYNC_getSock(SYNC_endpoint_t * endpoint)
+{
+ int sd;
+ assert((sd = socket(endpoint->domain, SOCK_STREAM, 0)) >= 0);
+ return sd;
+}
/* daemon com SYNC client interface */
+/**
+ * open a client connection to a sync server
+ *
+ * @param[in] state pointer to sync client handle
+ *
+ * @return operation status
+ * @retval 1 success
+ *
+ * @note at present, this routine aborts rather than returning an error code
+ */
int
SYNC_connect(SYNC_client_state * state)
{
-#ifdef USE_UNIX_SOCKETS
- struct sockaddr_un addr;
-#else /* USE_UNIX_SOCKETS */
- struct sockaddr_in addr;
-#endif /* USE_UNIX_SOCKETS */
+ SYNC_sockaddr_t addr;
/* I can't believe the following is needed for localhost connections!! */
static time_t backoff[] =
{ 3, 3, 3, 5, 5, 5, 7, 15, 16, 24, 32, 40, 48, 0 };
return 1;
}
+ SYNC_getAddr(&state->endpoint, &addr);
+
for (;;) {
- state->fd = getport(state, &addr);
+ state->fd = SYNC_getSock(&state->endpoint);
if (connect(state->fd, (struct sockaddr *)&addr, sizeof(addr)) >= 0)
return 1;
if (!*timeout)
return 0;
}
+/**
+ * forcibly disconnect a sync client handle.
+ *
+ * @param[in] state pointer to sync client handle
+ *
+ * @retval operation status
+ * @retval 0 success
+ */
int
SYNC_disconnect(SYNC_client_state * state)
{
return 0;
}
+/**
+ * gracefully disconnect a sync client handle.
+ *
+ * @param[in] state pointer to sync client handle
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ */
afs_int32
SYNC_closeChannel(SYNC_client_state * state)
{
return SYNC_OK;
}
+/**
+ * forcibly break a client connection, and then create a new connection.
+ *
+ * @param[in] state pointer to sync client handle
+ *
+ * @post old connection dropped; new connection established
+ *
+ * @return @see SYNC_connect()
+ */
int
SYNC_reconnect(SYNC_client_state * state)
{
return SYNC_connect(state);
}
-/* private function to fill in the sockaddr struct for us */
-#ifdef USE_UNIX_SOCKETS
-static int
-getport(SYNC_client_state * state, struct sockaddr_un *addr)
-{
- int sd;
- char tbuffer[AFSDIR_PATH_MAX];
-
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- "fssync.sock", NULL);
- memset(addr, 0, sizeof(*addr));
- addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, tbuffer, (sizeof(struct sockaddr_un) - sizeof(short)));
- assert((sd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0);
- return sd;
-}
-#else /* USE_UNIX_SOCKETS */
-static int
-getport(SYNC_client_state * state, struct sockaddr_in *addr)
-{
- int sd;
- memset(addr, 0, sizeof(*addr));
- assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
-#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
- addr->sin_len = sizeof(struct sockaddr_in);
-#endif
- addr->sin_addr.s_addr = htonl(0x7f000001);
- addr->sin_family = AF_INET; /* was localhost->h_addrtype */
- addr->sin_port = htons(state->port); /* XXXX htons not _really_ neccessary */
- return sd;
-}
-#endif /* USE_UNIX_SOCKETS */
-
+/**
+ * send a command to a sync server and wait for a response.
+ *
+ * @param[in] state pointer to sync client handle
+ * @param[in] com command object
+ * @param[out] res response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_COM_ERROR communications error
+ * @retval SYNC_BAD_COMMAND server did not recognize command code
+ *
+ * @note this routine merely handles error processing; SYNC_ask_internal()
+ * handles the low-level details of communicating with the SYNC server.
+ *
+ * @see SYNC_ask_internal
+ */
afs_int32
SYNC_ask(SYNC_client_state * state, SYNC_command * com, SYNC_response * res)
{
if (code == SYNC_COM_ERROR) {
Log("SYNC_ask: fatal protocol error on circuit '%s'; disabling sync "
- "protocol to server running on port %d until next server restart\n",
- state->proto_name, state->port);
+ "protocol until next server restart\n",
+ state->proto_name);
state->fatal_error = 1;
}
return code;
}
+/**
+ * send a command to a sync server and wait for a response.
+ *
+ * @param[in] state pointer to sync client handle
+ * @param[in] com command object
+ * @param[out] res response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_COM_ERROR communications error
+ *
+ * @internal
+ */
static afs_int32
SYNC_ask_internal(SYNC_client_state * state, SYNC_command * com, SYNC_response * res)
{
* daemon com SYNC server-side interfaces
*/
-/* get a command */
+/**
+ * receive a command structure off a sync socket.
+ *
+ * @param[in] fd socket descriptor
+ * @param[out] com sync command object to be populated
+ *
+ * @return operation status
+ * @retval SYNC_OK command received
+ * @retval SYNC_COM_ERROR there was a socket communications error
+ */
afs_int32
SYNC_getCom(int fd, SYNC_command * com)
{
return code;
}
-/* put a response */
+/**
+ * write a response structure to a sync socket.
+ *
+ * @param[in] fd
+ * @param[in] res
+ *
+ * @return operation status
+ * @retval SYNC_OK
+ * @retval SYNC_COM_ERROR
+ */
afs_int32
SYNC_putRes(int fd, SYNC_response * res)
{
return (s_len == len) ? 1 : 0;
}
+
+/**
+ * clean up old sockets.
+ *
+ * @param[in] state server state object
+ *
+ * @post unix domain sockets are cleaned up
+ */
+void
+SYNC_cleanupSock(SYNC_server_state_t * state)
+{
+#ifdef USE_UNIX_SOCKETS
+ remove(state->addr.sun_path);
+#endif
+}
+
+/**
+ * bind socket and set it to listen state.
+ *
+ * @param[in] state server state object
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval nonzero failure
+ *
+ * @post socket bound and set to listen state
+ */
+int
+SYNC_bindSock(SYNC_server_state_t * state)
+{
+ int code;
+ int on = 1;
+ int numTries;
+
+ /* Reuseaddr needed because system inexplicably leaves crud lying around */
+ code =
+ setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
+ sizeof(on));
+ if (code)
+ Log("SYNC_bindSock: setsockopt failed with (%d)\n", errno);
+
+ for (numTries = 0; numTries < state->bind_retry_limit; numTries++) {
+ code = bind(state->fd,
+ (struct sockaddr *)&state->addr,
+ sizeof(state->addr));
+ if (code == 0)
+ break;
+ Log("SYNC_bindSock: bind failed with (%d), will sleep and retry\n",
+ errno);
+ sleep(5);
+ }
+ listen(state->fd, state->listen_depth);
+
+ return code;
+}
/*
- * Copyright 2006-2007, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#define SYNC_COM_CODE_USER_BASE 65536
#define SYNC_COM_CODE_DECL(code) (SYNC_COM_CODE_USER_BASE+(code))
-/* general command codes */
-#define SYNC_COM_CHANNEL_CLOSE 0
+/**
+ * general command codes.
+ */
+enum SYNCOpCode {
+ SYNC_COM_CHANNEL_CLOSE = 0, /**< request sync channel shutdown */
+};
/* SYNC protocol response codes
#define SYNC_RES_CODE_USER_BASE 65536
#define SYNC_RES_CODE_DECL(code) (SYNC_RES_CODE_USER_BASE+(code))
-/* general response codes */
-#define SYNC_OK 0 /* sync call returned ok */
-#define SYNC_DENIED 1 /* sync request denied by server */
-#define SYNC_COM_ERROR 2 /* sync protocol communicaions error */
-#define SYNC_BAD_COMMAND 3 /* sync command code not implemented by server */
-#define SYNC_FAILED 4 /* sync server-side procedure failed */
-
+/**
+ * general response codes.
+ */
+enum SYNCReasonCode {
+ SYNC_OK = 0, /**< sync call returned ok */
+ SYNC_DENIED = 1, /**< sync request denied by server */
+ SYNC_COM_ERROR = 2, /**< sync protocol communicaions error */
+ SYNC_BAD_COMMAND = 3, /**< sync command code not implemented by server */
+ SYNC_FAILED = 4, /**< sync server-side procedure failed */
+};
/* SYNC protocol reason codes
*
#define SYNC_REASON_NONE 0
#define SYNC_REASON_MALFORMED_PACKET 1
#define SYNC_REASON_NOMEM 2
+#define SYNC_REASON_ENCODING_ERROR 3
/* SYNC protocol flags
*
afs_int64 _##buf##_l[SYNC_PROTO_MAX_LEN/sizeof(afs_int64)]; \
char * buf = (char *)(_##buf##_l)
+#ifdef USE_UNIX_SOCKETS
+#include <afs/afsutil.h>
+#include <sys/un.h>
+#define SYNC_SOCK_DOMAIN AF_UNIX
+typedef struct sockaddr_un SYNC_sockaddr_t;
+#else /* USE_UNIX_SOCKETS */
+#define SYNC_SOCK_DOMAIN AF_INET
+typedef struct sockaddr_in SYNC_sockaddr_t;
+#endif /* USE_UNIX_SOCKETS */
+
+/**
+ * sync server endpoint address.
+ */
+typedef struct SYNC_endpoint {
+ int domain; /**< socket domain */
+ afs_uint16 in; /**< localhost ipv4 tcp port number */
+ char * un; /**< unix domain socket filename (not a full path) */
+} SYNC_endpoint_t;
+
+#define SYNC_ENDPOINT_DECL(in_port, un_path) \
+ { SYNC_SOCK_DOMAIN, in_port, un_path }
-/* client-side state object */
+
+/**
+ * SYNC server state structure.
+ */
+typedef struct SYNC_server_state {
+ int fd; /**< listening socket descriptor */
+ SYNC_endpoint_t endpoint; /**< server endpoint address */
+ afs_uint32 proto_version; /**< our protocol version */
+ int bind_retry_limit; /**< upper limit on times to retry socket bind() */
+ int listen_depth; /**< socket listen queue depth */
+ char * proto_name; /**< sync protocol associated with this conn */
+ SYNC_sockaddr_t addr; /**< server listen socket sockaddr */
+} SYNC_server_state_t;
+
+/**
+ * SYNC client state structure.
+ */
typedef struct SYNC_client_state {
- int fd;
- afs_uint16 port;
- afs_uint32 proto_version;
- int retry_limit; /* max number of times for SYNC_ask to retry */
- afs_int32 hard_timeout; /* upper limit on time to keep trying */
+ int fd; /**< client socket descriptor */
+ SYNC_endpoint_t endpoint; /**< address of sync server */
+ afs_uint32 proto_version; /**< our protocol version */
+ int retry_limit; /**< max number of times for SYNC_ask to retry */
+ afs_int32 hard_timeout; /**< upper limit on time to keep trying */
char * proto_name; /**< sync protocol associated with this conn */
- byte fatal_error; /* fatal error on this client conn */
+ byte fatal_error; /**< nonzer if fatal error on this client conn */
} SYNC_client_state;
/* wire types */
afs_int32 recv_len;
} SYNC_response;
+/* general prototypes */
+extern int SYNC_getSock(SYNC_endpoint_t * endpoint);
+extern void SYNC_getAddr(SYNC_endpoint_t * endpoint, SYNC_sockaddr_t * addr);
/* client-side prototypes */
extern afs_int32 SYNC_ask(SYNC_client_state *, SYNC_command * com, SYNC_response * res);
extern int SYNC_getCom(int fd, SYNC_command * com);
extern int SYNC_putRes(int fd, SYNC_response * res);
extern int SYNC_verifyProtocolString(char * buf, size_t len);
+extern void SYNC_cleanupSock(SYNC_server_state_t * state);
+extern int SYNC_bindSock(SYNC_server_state_t * state);
#endif /* _AFS_VOL_DAEMON_COM_H */
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2006,2008 Sine Nomine Associates
*/
/*
static SYNC_client_state fssync_state =
{ -1, /* file descriptor */
- 2040, /* port number */
+ FSSYNC_ENDPOINT_DECL, /* server endpoint */
FSYNC_PROTO_VERSION, /* protocol version */
5, /* connect retry limit */
120, /* hard timeout */
/*
- * Copyright 2006, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
struct volop_state {
afs_uint32 volume;
+ afs_uint32 vnode;
+ afs_uint32 unique;
char partName[16];
};
static int VolHdrQuery(struct cmd_syndesc * as, void * rock);
static int VolOpQuery(struct cmd_syndesc * as, void * rock);
static int StatsQuery(struct cmd_syndesc * as, void * rock);
-
+static int VnQuery(struct cmd_syndesc * as, void * rock);
static void print_vol_stats_general(VolPkgStats * stats);
static void print_vol_stats_viceP(struct DiskPartitionStats * stats);
VOLOP_PARMS_DECL(ts);
cmd_CreateAlias(ts, "vop");
+ ts = cmd_CreateSyntax("vnode", VnQuery, NULL, "get vnode structure (FSYNC_VOL_QUERY_VNODE opcode)");
+ cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
+ cmd_AddParm(ts, "-volumeid", CMD_SINGLE, 0, "volume id");
+ cmd_AddParm(ts, "-vnodeid", CMD_SINGLE, 0, "vnode id");
+ cmd_AddParm(ts, "-unique", CMD_SINGLE, 0, "uniquifier");
+ cmd_AddParm(ts, "-partition", CMD_SINGLE, 0, "paritition name");
+ COMMON_PARMS_DECL(ts);
+
ts = cmd_CreateSyntax("stats", StatsQuery, NULL, "see 'stats help' for more information");
cmd_Seek(ts, CUSTOM_PARMS_OFFSET);
cmd_AddParm(ts, "-cmd", CMD_SINGLE, 0, "subcommand");
VDisconnectFS();
}
+
+#define ENUMTOSTRING(en) #en
+#define ENUMCASE(en) \
+ case en: \
+ return ENUMTOSTRING(en); \
+ break
+
+#define FLAGTOSTRING(fl) #fl
+#define FLAGCASE(bitstr, fl, str, count) \
+ do { \
+ if ((bitstr) & (fl)) { \
+ if (count) \
+ strlcat((str), " | ", sizeof(str)); \
+ strlcat((str), FLAGTOSTRING(fl), sizeof(str)); \
+ (count)++; \
+ } \
+ } while (0)
+
static char *
response_code_to_string(afs_int32 response)
{
switch (response) {
- case SYNC_OK:
- return "SYNC_OK";
- case SYNC_DENIED:
- return "SYNC_DENIED";
- case SYNC_COM_ERROR:
- return "SYNC_COM_ERROR";
- case SYNC_BAD_COMMAND:
- return "SYNC_BAD_COMMAND";
- case SYNC_FAILED:
- return "SYNC_FAILED";
+ ENUMCASE(SYNC_OK);
+ ENUMCASE(SYNC_DENIED);
+ ENUMCASE(SYNC_COM_ERROR);
+ ENUMCASE(SYNC_BAD_COMMAND);
+ ENUMCASE(SYNC_FAILED);
default:
return "**UNKNOWN**";
}
command_code_to_string(afs_int32 command)
{
switch (command) {
- case SYNC_COM_CHANNEL_CLOSE:
- return "SYNC_COM_CHANNEL_CLOSE";
- case FSYNC_VOL_ON:
- return "FSYNC_VOL_ON";
- case FSYNC_VOL_OFF:
- return "FSYNC_VOL_OFF";
- case FSYNC_VOL_LISTVOLUMES:
- return "FSYNC_VOL_LISTVOLUMES";
- case FSYNC_VOL_NEEDVOLUME:
- return "FSYNC_VOL_NEEDVOLUME";
- case FSYNC_VOL_MOVE:
- return "FSYNC_VOL_MOVE";
- case FSYNC_VOL_BREAKCBKS:
- return "FSYNC_VOL_BREAKCBKS";
- case FSYNC_VOL_DONE:
- return "FSYNC_VOL_DONE";
- case FSYNC_VOL_QUERY:
- return "FSYNC_VOL_QUERY";
- case FSYNC_VOL_QUERY_HDR:
- return "FSYNC_VOL_QUERY_HDR";
- case FSYNC_VOL_QUERY_VOP:
- return "FSYNC_VOL_QUERY_VOP";
- case FSYNC_VOL_STATS_GENERAL:
- return "FSYNC_VOL_STATS_GENERAL";
- case FSYNC_VOL_STATS_VICEP:
- return "FSYNC_VOL_STATS_VICEP";
- case FSYNC_VOL_STATS_HASH:
- return "FSYNC_VOL_STATS_HASH";
- case FSYNC_VOL_STATS_HDR:
- return "FSYNC_VOL_STATS_HDR";
- case FSYNC_VOL_STATS_VLRU:
- return "FSYNC_VOL_STATS_VLRU";
+ ENUMCASE(SYNC_COM_CHANNEL_CLOSE);
+ ENUMCASE(FSYNC_VOL_ON);
+ ENUMCASE(FSYNC_VOL_OFF);
+ ENUMCASE(FSYNC_VOL_LISTVOLUMES);
+ ENUMCASE(FSYNC_VOL_NEEDVOLUME);
+ ENUMCASE(FSYNC_VOL_MOVE);
+ ENUMCASE(FSYNC_VOL_BREAKCBKS);
+ ENUMCASE(FSYNC_VOL_DONE);
+ ENUMCASE(FSYNC_VOL_QUERY);
+ ENUMCASE(FSYNC_VOL_QUERY_HDR);
+ ENUMCASE(FSYNC_VOL_QUERY_VOP);
+ ENUMCASE(FSYNC_VOL_STATS_GENERAL);
+ ENUMCASE(FSYNC_VOL_STATS_VICEP);
+ ENUMCASE(FSYNC_VOL_STATS_HASH);
+ ENUMCASE(FSYNC_VOL_STATS_HDR);
+ ENUMCASE(FSYNC_VOL_STATS_VLRU);
+ ENUMCASE(FSYNC_VOL_ATTACH);
+ ENUMCASE(FSYNC_VOL_FORCE_ERROR);
+ ENUMCASE(FSYNC_VOL_LEAVE_OFF);
+ ENUMCASE(FSYNC_VOL_QUERY_VNODE);
default:
return "**UNKNOWN**";
}
reason_code_to_string(afs_int32 reason)
{
switch (reason) {
- case SYNC_REASON_NONE:
- return "SYNC_REASON_NONE";
- case SYNC_REASON_MALFORMED_PACKET:
- return "SYNC_REASON_MALFORMED_PACKET";
- case FSYNC_WHATEVER:
- return "FSYNC_WHATEVER";
- case FSYNC_SALVAGE:
- return "FSYNC_SALVAGE";
- case FSYNC_MOVE:
- return "FSYNC_MOVE";
- case FSYNC_OPERATOR:
- return "FSYNC_OPERATOR";
- case FSYNC_EXCLUSIVE:
- return "FSYNC_EXCLUSIVE";
- case FSYNC_UNKNOWN_VOLID:
- return "FSYNC_UNKNOWN_VOLID";
- case FSYNC_HDR_NOT_ATTACHED:
- return "FSYNC_HDR_NOT_ATTACHED";
- case FSYNC_NO_PENDING_VOL_OP:
- return "FSYNC_NO_PENDING_VOL_OP";
- case FSYNC_VOL_PKG_ERROR:
- return "FSYNC_VOL_PKG_ERROR";
+ ENUMCASE(SYNC_REASON_NONE);
+ ENUMCASE(SYNC_REASON_MALFORMED_PACKET);
+ ENUMCASE(SYNC_REASON_NOMEM);
+ ENUMCASE(SYNC_REASON_ENCODING_ERROR);
+ ENUMCASE(FSYNC_WHATEVER);
+ ENUMCASE(FSYNC_SALVAGE);
+ ENUMCASE(FSYNC_MOVE);
+ ENUMCASE(FSYNC_OPERATOR);
+ ENUMCASE(FSYNC_EXCLUSIVE);
+ ENUMCASE(FSYNC_UNKNOWN_VOLID);
+ ENUMCASE(FSYNC_HDR_NOT_ATTACHED);
+ ENUMCASE(FSYNC_NO_PENDING_VOL_OP);
+ ENUMCASE(FSYNC_VOL_PKG_ERROR);
+ ENUMCASE(FSYNC_UNKNOWN_VNID);
default:
return "**UNKNOWN**";
}
program_type_to_string(afs_int32 type)
{
switch ((ProgramType)type) {
- case fileServer:
- return "fileServer";
- case volumeUtility:
- return "volumeUtility";
- case salvager:
- return "salvager";
- case salvageServer:
- return "salvageServer";
- case debugUtility:
- return "debugUtility";
+ ENUMCASE(fileServer);
+ ENUMCASE(volumeUtility);
+ ENUMCASE(salvager);
+ ENUMCASE(salvageServer);
+ ENUMCASE(debugUtility);
default:
return "**UNKNOWN**";
}
vol_state_to_string(VolState state)
{
switch (state) {
- case VOL_STATE_UNATTACHED:
- return "VOL_STATE_UNATTACHED";
- case VOL_STATE_PREATTACHED:
- return "VOL_STATE_PREATTACHED";
- case VOL_STATE_ATTACHING:
- return "VOL_STATE_ATTACHING";
- case VOL_STATE_ATTACHED:
- return "VOL_STATE_ATTACHED";
- case VOL_STATE_UPDATING:
- return "VOL_STATE_UPDATING";
- case VOL_STATE_GET_BITMAP:
- return "VOL_STATE_GET_BITMAP";
- case VOL_STATE_HDR_LOADING:
- return "VOL_STATE_HDR_LOADING";
- case VOL_STATE_HDR_ATTACHING:
- return "VOL_STATE_HDR_ATTACHING";
- case VOL_STATE_SHUTTING_DOWN:
- return "VOL_STATE_SHUTTING_DOWN";
- case VOL_STATE_GOING_OFFLINE:
- return "VOL_STATE_GOING_OFFLINE";
- case VOL_STATE_OFFLINING:
- return "VOL_STATE_OFFLINING";
- case VOL_STATE_DETACHING:
- return "VOL_STATE_DETACHING";
- case VOL_STATE_SALVSYNC_REQ:
- return "VOL_STATE_SALVSYNC_REQ";
- case VOL_STATE_SALVAGING:
- return "VOL_STATE_SALVAGING";
- case VOL_STATE_ERROR:
- return "VOL_STATE_ERROR";
- case VOL_STATE_FREED:
- return "VOL_STATE_FREED";
+ ENUMCASE(VOL_STATE_UNATTACHED);
+ ENUMCASE(VOL_STATE_PREATTACHED);
+ ENUMCASE(VOL_STATE_ATTACHING);
+ ENUMCASE(VOL_STATE_ATTACHED);
+ ENUMCASE(VOL_STATE_UPDATING);
+ ENUMCASE(VOL_STATE_GET_BITMAP);
+ ENUMCASE(VOL_STATE_HDR_LOADING);
+ ENUMCASE(VOL_STATE_HDR_ATTACHING);
+ ENUMCASE(VOL_STATE_SHUTTING_DOWN);
+ ENUMCASE(VOL_STATE_GOING_OFFLINE);
+ ENUMCASE(VOL_STATE_OFFLINING);
+ ENUMCASE(VOL_STATE_DETACHING);
+ ENUMCASE(VOL_STATE_SALVSYNC_REQ);
+ ENUMCASE(VOL_STATE_SALVAGING);
+ ENUMCASE(VOL_STATE_ERROR);
+ ENUMCASE(VOL_STATE_VNODE_ALLOC);
+ ENUMCASE(VOL_STATE_VNODE_GET);
+ ENUMCASE(VOL_STATE_VNODE_CLOSE);
+ ENUMCASE(VOL_STATE_VNODE_RELEASE);
+ ENUMCASE(VOL_STATE_VLRU_ADD);
+ ENUMCASE(VOL_STATE_FREED);
default:
return "**UNKNOWN**";
}
int count = 0;
str[0]='\0';
- if (flags & VOL_HDR_ATTACHED) {
- strlcat(str, "VOL_HDR_ATTACHED", sizeof(str));
- count++;
- }
-
- if (flags & VOL_HDR_LOADED) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_HDR_LOADED", sizeof(str));
- count++;
- }
-
- if (flags & VOL_HDR_IN_LRU) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_HDR_IN_LRU", sizeof(str));
- count++;
- }
-
- if (flags & VOL_IN_HASH) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_IN_HASH", sizeof(str));
- count++;
- }
-
- if (flags & VOL_ON_VBYP_LIST) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_ON_VBYP_LIST", sizeof(str));
- count++;
- }
-
- if (flags & VOL_IS_BUSY) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_IS_BUSY", sizeof(str));
- count++;
- }
-
- if (flags & VOL_ON_VLRU) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_ON_VLRU", sizeof(str));
- }
-
- if (flags & VOL_HDR_DONTSALV) {
- if (count) {
- strlcat(str, " | ", sizeof(str));
- }
- strlcat(str, "VOL_HDR_DONTSALV", sizeof(str));
- }
+ FLAGCASE(flags, VOL_HDR_ATTACHED, str, count);
+ FLAGCASE(flags, VOL_HDR_LOADED, str, count);
+ FLAGCASE(flags, VOL_HDR_IN_LRU, str, count);
+ FLAGCASE(flags, VOL_IN_HASH, str, count);
+ FLAGCASE(flags, VOL_ON_VBYP_LIST, str, count);
+ FLAGCASE(flags, VOL_IS_BUSY, str, count);
+ FLAGCASE(flags, VOL_ON_VLRU, str, count);
+ FLAGCASE(flags, VOL_HDR_DONTSALV, str, count);
return str;
}
vlru_idx_to_string(int idx)
{
switch (idx) {
- case VLRU_QUEUE_NEW:
- return "VLRU_QUEUE_NEW";
- case VLRU_QUEUE_MID:
- return "VLRU_QUEUE_MID";
- case VLRU_QUEUE_OLD:
- return "VLRU_QUEUE_OLD";
- case VLRU_QUEUE_CANDIDATE:
- return "VLRU_QUEUE_CANDIDATE";
- case VLRU_QUEUE_HELD:
- return "VLRU_QUEUE_HELD";
- case VLRU_QUEUE_INVALID:
- return "VLRU_QUEUE_INVALID";
+ ENUMCASE(VLRU_QUEUE_NEW);
+ ENUMCASE(VLRU_QUEUE_MID);
+ ENUMCASE(VLRU_QUEUE_OLD);
+ ENUMCASE(VLRU_QUEUE_CANDIDATE);
+ ENUMCASE(VLRU_QUEUE_HELD);
+ ENUMCASE(VLRU_QUEUE_INVALID);
default:
return "**UNKNOWN**";
}
}
+
+
+static char *
+vn_state_to_string(VnState state)
+{
+ switch (state) {
+ ENUMCASE(VN_STATE_INVALID);
+ ENUMCASE(VN_STATE_RELEASING);
+ ENUMCASE(VN_STATE_CLOSING);
+ ENUMCASE(VN_STATE_ALLOC);
+ ENUMCASE(VN_STATE_ONLINE);
+ ENUMCASE(VN_STATE_LOAD);
+ ENUMCASE(VN_STATE_EXCLUSIVE);
+ ENUMCASE(VN_STATE_STORE);
+ ENUMCASE(VN_STATE_READ);
+ ENUMCASE(VN_STATE_ERROR);
+ default:
+ return "**UNKNOWN**";
+ }
+}
+
+static char *
+vn_flags_to_string(afs_uint32 flags)
+{
+ static char str[128];
+ int count = 0;
+ str[0]='\0';
+
+ FLAGCASE(flags, VN_ON_HASH, str, count);
+ FLAGCASE(flags, VN_ON_LRU, str, count);
+ FLAGCASE(flags, VN_ON_VVN, str, count);
+
+ return str;
+}
#endif
static int
printf("\t\tlast_get = %u\n", v.stats.last_get);
printf("\t\tlast_promote = %u\n", v.stats.last_promote);
printf("\t\tlast_hdr_get = %u\n", v.stats.last_hdr_get);
+ printf("\t\tlast_hdr_load = %u\n", v.stats.last_hdr_load);
printf("\t\tlast_salvage = %u\n", v.stats.last_salvage);
printf("\t\tlast_salvage_req = %u\n", v.stats.last_salvage_req);
printf("\t\tlast_vol_op = %u\n", v.stats.last_vol_op);
}
static int
+vn_prolog(struct cmd_syndesc * as, struct state * state)
+{
+ register struct cmd_item *ti;
+ char pname[100], *temp;
+
+ state->vop = (struct volop_state *) calloc(1, sizeof(struct volop_state));
+ assert(state->vop != NULL);
+
+ if ((ti = as->parms[CUSTOM_PARMS_OFFSET].items)) { /* -volumeid */
+ state->vop->volume = atoi(ti->data);
+ } else {
+ fprintf(stderr, "required argument -volumeid not given\n");
+ }
+
+ if ((ti = as->parms[CUSTOM_PARMS_OFFSET+1].items)) { /* -vnodeid */
+ state->vop->vnode = atoi(ti->data);
+ } else {
+ fprintf(stderr, "required argument -vnodeid not given\n");
+ }
+
+ if ((ti = as->parms[CUSTOM_PARMS_OFFSET+2].items)) { /* -unique */
+ state->vop->unique = atoi(ti->data);
+ } else {
+ state->vop->unique = 0;
+ }
+
+ if ((ti = as->parms[COMMON_VOLOP_PARMS_OFFSET+3].items)) { /* -partition */
+ strlcpy(state->vop->partName, ti->data, sizeof(state->vop->partName));
+ } else {
+ memset(state->vop->partName, 0, sizeof(state->vop->partName));
+ }
+
+ return 0;
+}
+
+static int
+do_vnqry(struct state * state, SYNC_response * res)
+{
+ afs_int32 code;
+ int command = FSYNC_VOL_QUERY_VNODE;
+ FSSYNC_VnQry_hdr qry;
+
+ qry.volume = state->vop->volume;
+ qry.vnode = state->vop->vnode;
+ qry.unique = state->vop->unique;
+ qry.spare = 0;
+ strlcpy(qry.partName, state->vop->partName, sizeof(qry.partName));
+
+ fprintf(stderr, "calling FSYNC_GenericOp with command code %d (%s)\n",
+ command, command_code_to_string(command));
+
+ code = FSYNC_GenericOp(&qry, sizeof(qry), command, FSYNC_OPERATOR, res);
+
+ switch (code) {
+ case SYNC_OK:
+ case SYNC_DENIED:
+ break;
+ default:
+ fprintf(stderr, "possible sync protocol error. return code was %d\n", code);
+ }
+
+ fprintf(stderr, "FSYNC_GenericOp returned %d (%s)\n", code, response_code_to_string(code));
+ fprintf(stderr, "protocol response code was %d (%s)\n",
+ res->hdr.response, response_code_to_string(res->hdr.response));
+ fprintf(stderr, "protocol reason code was %d (%s)\n",
+ res->hdr.reason, reason_code_to_string(res->hdr.reason));
+
+ VDisconnectFS();
+
+ return 0;
+}
+
+static int
+VnQuery(struct cmd_syndesc * as, void * rock)
+{
+ struct state state;
+ SYNC_PROTO_BUF_DECL(res_buf);
+ SYNC_response res;
+ Vnode v;
+ int hi, lo;
+
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = res_buf;
+ res.payload.len = SYNC_PROTO_MAX_LEN;
+
+ common_prolog(as, &state);
+ vn_prolog(as, &state);
+
+ do_vnqry(&state, &res);
+
+ if (res.hdr.response == SYNC_OK) {
+ memcpy(&v, res.payload.buf, sizeof(Volume));
+
+ printf("vnode = {\n");
+
+ printf("\tvid_hash = {\n");
+ printf("\t\tnext = 0x%lx\n", v.vid_hash.next);
+ printf("\t\tprev = 0x%lx\n", v.vid_hash.prev);
+ printf("\t}\n");
+
+ printf("\thashNext = 0x%lx\n", v.hashNext);
+ printf("\tlruNext = 0x%lx\n", v.lruNext);
+ printf("\tlruPrev = 0x%lx\n", v.lruPrev);
+ printf("\thashIndex = %hu\n", v.hashIndex);
+ printf("\tchanged_newTime = %u\n", (unsigned int) v.changed_newTime);
+ printf("\tchanged_oldTime = %u\n", (unsigned int) v.changed_oldTime);
+ printf("\tdelete = %u\n", (unsigned int) v.delete);
+ printf("\tvnodeNumber = %u\n", v.vnodeNumber);
+ printf("\tvolumePtr = 0x%lx\n", v.volumePtr);
+ printf("\tnUsers = %u\n", v.nUsers);
+ printf("\tcacheCheck = %u\n", v.cacheCheck);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (!(res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS)) {
+ printf("*** fssync-debug built to expect demand attach extensions. server asserted\n");
+ printf("*** that it was not compiled with demand attach turned on. please recompile\n");
+ printf("*** fssync-debug to match your server\n");
+ goto done;
+ }
+
+ printf("\tnReaders = %u\n", v.nReaders);
+ printf("\tvn_state_flags = %s\n", vn_flags_to_string(v.vn_state_flags));
+ printf("\tvn_state = %s\n", vn_state_to_string(v.vn_state));
+#else
+ if (res.hdr.flags & SYNC_FLAG_DAFS_EXTENSIONS) {
+ printf("*** server asserted demand attach extensions. fssync-debug not built to\n");
+ printf("*** recognize those extensions. please recompile fssync-debug if you need\n");
+ printf("*** to dump dafs extended state\n");
+ goto done;
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ printf("\twriter = %u\n", v.writer);
+ printf("\tvcp = 0x%lx\n", v.vcp);
+ printf("\thandle = 0x%lx\n", v.handle);
+
+ printf("\tdisk = {\n");
+ printf("\t\ttype = %u\n", v.disk.type);
+ printf("\t\tcloned = %u\n", v.disk.cloned);
+ printf("\t\tmodeBits = %u\n", v.disk.modeBits);
+ printf("\t\tlinkCount = %d\n", v.disk.linkCount);
+ printf("\t\tlength = %u\n", v.disk.length);
+ printf("\t\tuniquifier = %u\n", v.disk.uniquifier);
+ printf("\t\tdataVersion = %u\n", v.disk.dataVersion);
+ printf("\t\tvn_ino_lo = %u\n", v.disk.vn_ino_lo);
+ printf("\t\tunixModifyTime = %u\n", v.disk.unixModifyTime);
+ printf("\t\tauthor = %u\n", v.disk.author);
+ printf("\t\towner = %u\n", v.disk.owner);
+ printf("\t\tparent = %u\n", v.disk.parent);
+ printf("\t\tvnodeMagic = %u\n", v.disk.vnodeMagic);
+
+ printf("\t\tlock = {\n");
+ printf("\t\t\tlockCount = %d\n", v.disk.lock.lockCount);
+ printf("\t\t\tlockTime = %d\n", v.disk.lock.lockTime);
+ printf("\t\t}\n");
+
+ printf("\t\tserverModifyTime = %u\n", v.disk.serverModifyTime);
+ printf("\t\tgroup = %d\n", v.disk.group);
+ printf("\t\tvn_ino_hi = %d\n", v.disk.vn_ino_hi);
+ printf("\t\treserved6 = %u\n", v.disk.reserved6);
+ printf("\t}\n");
+
+ printf("}\n");
+ }
+
+ done:
+ return 0;
+}
+
+
+static int
StatsQuery(struct cmd_syndesc * as, void * rock)
{
afs_int32 code;
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006-2007 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2008 Sine Nomine Associates
*/
/*
#include "ihandle.h"
#include "vnode.h"
#include "volume.h"
+#include "volume_inline.h"
#include "partition.h"
#ifdef HAVE_POLL
* cloned read-only copies offline when salvaging
* a single read-write volume */
-#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
-
static struct offlineInfo OfflineVolumes[MAXHANDLERS][MAXOFFLINEVOLUMES];
-static int AcceptSd = -1; /* Socket used by server for accepting connections */
+/**
+ * fssync server socket handle.
+ */
+static SYNC_server_state_t fssync_server_state =
+ { -1, /* file descriptor */
+ FSSYNC_ENDPOINT_DECL, /* server endpoint */
+ FSYNC_PROTO_VERSION, /* protocol version */
+ 5, /* bind() retry limit */
+ 100, /* listen() queue depth */
+ "FSSYNC", /* protocol name string */
+ };
-static int getport();
/* Forward declarations */
-static void FSYNC_sync();
+static void * FSYNC_sync(void *);
static void FSYNC_newconnection();
static void FSYNC_com();
static void FSYNC_Drop();
static afs_int32 FSYNC_com_VolOp(int fd, SYNC_command * com, SYNC_response * res);
+static afs_int32 FSYNC_com_VolError(FSSYNC_VolOp_command * com, SYNC_response * res);
static afs_int32 FSYNC_com_VolOn(FSSYNC_VolOp_command * com, SYNC_response * res);
static afs_int32 FSYNC_com_VolOff(FSSYNC_VolOp_command * com, SYNC_response * res);
static afs_int32 FSYNC_com_VolMove(FSSYNC_VolOp_command * com, SYNC_response * res);
static afs_int32 FSYNC_com_VolOpQuery(FSSYNC_VolOp_command * com, SYNC_response * res);
#endif /* AFS_DEMAND_ATTACH_FS */
+static afs_int32 FSYNC_com_VnQry(int fd, SYNC_command * com, SYNC_response * res);
+
static afs_int32 FSYNC_com_StatsOp(int fd, SYNC_command * com, SYNC_response * res);
static afs_int32 FSYNC_com_StatsOpGeneral(FSSYNC_StatsOp_command * scom, SYNC_response * res);
static fd_set FSYNC_readfds;
#endif
-#ifdef USE_UNIX_SOCKETS
-static int
-getport(struct sockaddr_un *addr)
-{
- int sd;
- char tbuffer[AFSDIR_PATH_MAX];
-
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- "fssync.sock", NULL);
-
- memset(addr, 0, sizeof(*addr));
- addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, tbuffer, (sizeof(struct sockaddr_un) - sizeof(short)));
- assert((sd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0);
- return sd;
-}
-#else
-static int
-getport(struct sockaddr_in *addr)
-{
- int sd;
-
- memset(addr, 0, sizeof(*addr));
- assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
-#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
- addr->sin_len = sizeof(struct sockaddr_in);
-#endif
- addr->sin_addr.s_addr = htonl(0x7f000001);
- addr->sin_family = AF_INET; /* was localhost->h_addrtype */
- addr->sin_port = htons(2040); /* XXXX htons not _really_ neccessary */
-
- return sd;
-}
-#endif
-
-static void
-FSYNC_sync()
+static void *
+FSYNC_sync(void * args)
{
#ifdef USE_UNIX_SOCKETS
- struct sockaddr_un addr;
char tbuffer[AFSDIR_PATH_MAX];
-#else /* USE_UNIX_SOCKETS */
- struct sockaddr_in addr;
#endif /* USE_UNIX_SOCKETS */
int on = 1;
extern int VInit;
#ifdef AFS_PTHREAD_ENV
int tid;
#endif
+ SYNC_server_state_t * state = &fssync_server_state;
+
+ SYNC_getAddr(&state->endpoint, &state->addr);
+ SYNC_cleanupSock(state);
#ifndef AFS_NT40_ENV
(void)signal(SIGPIPE, SIG_IGN);
Log("Set thread id %d for FSYNC_sync\n", tid);
#endif /* AFS_PTHREAD_ENV */
-#ifdef USE_UNIX_SOCKETS
- /* ignore errors */
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- "fssync.sock", NULL);
-
- remove(tbuffer);
-#endif /* USE_UNIX_SOCKETS */
-
while (!VInit) {
/* Let somebody else run until level > 0. That doesn't mean that
* all volumes have been attached. */
LWP_DispatchProcess();
#endif /* AFS_PTHREAD_ENV */
}
- AcceptSd = getport(&addr);
- /* Reuseaddr needed because system inexplicably leaves crud lying around */
- code =
- setsockopt(AcceptSd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
- sizeof(on));
- if (code)
- Log("FSYNC_sync: setsockopt failed with (%d)\n", errno);
-
- for (numTries = 0; numTries < MAX_BIND_TRIES; numTries++) {
- if ((code =
- bind(AcceptSd, (struct sockaddr *)&addr, sizeof(addr))) == 0)
- break;
- Log("FSYNC_sync: bind failed with (%d), will sleep and retry\n",
- errno);
- sleep(5);
- }
+
+ state->fd = SYNC_getSock(&state->endpoint);
+ code = SYNC_bindSock(state);
assert(!code);
- listen(AcceptSd, 100);
+
InitHandler();
AcceptOn();
+
for (;;) {
#if defined(HAVE_POLL) && defined(AFS_PTHREAD_ENV)
int nfds;
VOL_LOCK;
switch (com.hdr.command) {
case FSYNC_VOL_ON:
+ case FSYNC_VOL_ATTACH:
+ case FSYNC_VOL_LEAVE_OFF:
case FSYNC_VOL_OFF:
+ case FSYNC_VOL_FORCE_ERROR:
case FSYNC_VOL_LISTVOLUMES:
case FSYNC_VOL_NEEDVOLUME:
case FSYNC_VOL_MOVE:
case FSYNC_VOL_STATS_VLRU:
res.hdr.response = FSYNC_com_StatsOp(fd, &com, &res);
break;
+ case FSYNC_VOL_QUERY_VNODE:
+ res.hdr.response = FSYNC_com_VnQry(fd, &com, &res);
+ break;
default:
res.hdr.response = SYNC_BAD_COMMAND;
break;
switch (com->hdr.command) {
case FSYNC_VOL_ON:
+ case FSYNC_VOL_ATTACH:
+ case FSYNC_VOL_LEAVE_OFF:
code = FSYNC_com_VolOn(&vcom, res);
break;
case FSYNC_VOL_OFF:
code = FSYNC_com_VolHdrQuery(&vcom, res);
break;
#ifdef AFS_DEMAND_ATTACH_FS
+ case FSYNC_VOL_FORCE_ERROR:
+ code = FSYNC_com_VolError(&vcom, res);
+ break;
case FSYNC_VOL_QUERY_VOP:
code = FSYNC_com_VolOpQuery(&vcom, res);
break;
/* so, we need to attach the volume */
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* check DAFS permissions */
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+ if (vp && !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName) &&
+ vp->pending_vol_op &&
+ (vcom->hdr->programType != vp->pending_vol_op->com.programType)) {
+ /* a different program has this volume checked out. deny. */
+ Log("FSYNC_VolOn: WARNING: program type %u has attempted to manipulate "
+ "state for volume %u using command code %u while the volume is "
+ "checked out by program type %u for command code %u.\n",
+ vcom->hdr->programType,
+ vcom->vop->volume,
+ vcom->hdr->command,
+ vp->pending_vol_op->com.programType,
+ vp->pending_vol_op->com.command);
+ code = SYNC_DENIED;
+ res->hdr.reason = FSYNC_EXCLUSIVE;
+ goto done;
+ }
+#endif
+
if (vcom->v)
vcom->v->volumeID = 0;
+
+
+ if (vcom->hdr->command == FSYNC_VOL_LEAVE_OFF) {
+ /* nothing much to do if we're leaving the volume offline */
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (vp &&
+ !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName)) {
+ VDeregisterVolOp_r(vp);
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+ }
+#endif
+ goto done;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* first, check to see whether we have such a volume defined */
+ vp = VPreAttachVolumeById_r(&error,
+ vcom->vop->partName,
+ vcom->vop->volume);
+ if (vp) {
+ VDeregisterVolOp_r(vp);
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
tvolName[0] = '/';
snprintf(&tvolName[1], sizeof(tvolName)-1, VFORMAT, vcom->vop->volume);
tvolName[sizeof(tvolName)-1] = '\0';
-#ifdef AFS_DEMAND_ATTACH_FS
- vp = VPreAttachVolumeByName_r(&error, vcom->vop->partName, tvolName,
- V_VOLUPD);
- if (vp && vp->pending_vol_op) {
- VDeregisterVolOp_r(vp, vp->pending_vol_op);
- }
-#else /* AFS_DEMAND_ATTACH_FS */
vp = VAttachVolumeByName_r(&error, vcom->vop->partName, tvolName,
V_VOLUPD);
if (vp)
VPutVolume_r(vp);
-#endif /* AFS_DEMAND_ATTACH_FS */
-
if (error) {
code = SYNC_DENIED;
res->hdr.reason = error;
}
+#endif /* !AFS_DEMAND_ATTACH_FS */
done:
return code;
#ifdef AFS_DEMAND_ATTACH_FS
vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
- if (vp && vp->pending_vol_op) {
- VDeregisterVolOp_r(vp, vp->pending_vol_op);
+ if (vp) {
+ VDeregisterVolOp_r(vp);
}
#endif
return SYNC_OK;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * force a volume into the hard error state.
+ */
+static afs_int32
+FSYNC_com_VolError(FSSYNC_VolOp_command * vcom, SYNC_response * res)
+{
+ Error error;
+ Volume * vp;
+ afs_int32 code = SYNC_DENIED;
+
+ vp = VLookupVolume_r(&error, vcom->vop->volume, NULL);
+ if (vp && !strcmp(VPartitionPath(V_partition(vp)), vcom->vop->partName)) {
+ memset(&vp->salvage, 0, sizeof(vp->salvage));
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ code = SYNC_OK;
+ } else {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ }
+
+ return code;
+}
+#endif
+
static afs_int32
FSYNC_com_VolBreakCBKs(FSSYNC_VolOp_command * vcom, SYNC_response * res)
{
#endif /* AFS_DEMAND_ATTACH_FS */
static afs_int32
+FSYNC_com_VnQry(int fd, SYNC_command * com, SYNC_response * res)
+{
+ afs_int32 code = SYNC_OK;
+ FSSYNC_VnQry_hdr * qry = com->payload.buf;
+ Volume * vp;
+ Vnode * vnp;
+ Error error;
+
+ if (com->recv_len != (sizeof(com->hdr) + sizeof(FSSYNC_VnQry_hdr))) {
+ res->hdr.reason = SYNC_REASON_MALFORMED_PACKET;
+ res->hdr.flags |= SYNC_FLAG_CHANNEL_SHUTDOWN;
+ return SYNC_COM_ERROR;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vp = VLookupVolume_r(&error, qry->volume, NULL);
+#else /* !AFS_DEMAND_ATTACH_FS */
+ vp = VGetVolume_r(&error, qry->volume);
+#endif /* !AFS_DEMAND_ATTACH_FS */
+
+ if (!vp) {
+ res->hdr.reason = FSYNC_UNKNOWN_VOLID;
+ code = SYNC_FAILED;
+ goto done;
+ }
+
+ vnp = VLookupVnode(vp, qry->vnode);
+ if (!vnp) {
+ res->hdr.reason = FSYNC_UNKNOWN_VNID;
+ code = SYNC_FAILED;
+ goto cleanup;
+ }
+
+ if (Vn_class(vnp)->residentSize > res->payload.len) {
+ res->hdr.reason = SYNC_REASON_ENCODING_ERROR;
+ code = SYNC_FAILED;
+ goto cleanup;
+ }
+
+ memcpy(res->payload.buf, vnp, Vn_class(vnp)->residentSize);
+ res->hdr.response_len += Vn_class(vnp)->residentSize;
+
+ cleanup:
+#ifndef AFS_DEMAND_ATTACH_FS
+ VPutVolume_r(vp);
+#endif
+
+ done:
+ return code;
+}
+
+static afs_int32
FSYNC_com_StatsOp(int fd, SYNC_command * com, SYNC_response * res)
{
- int i;
afs_int32 code = SYNC_OK;
FSSYNC_StatsOp_command scom;
AcceptOn()
{
if (AcceptHandler == -1) {
- assert(AddHandler(AcceptSd, FSYNC_newconnection));
- AcceptHandler = FindHandler(AcceptSd);
+ assert(AddHandler(fssync_server_state.fd, FSYNC_newconnection));
+ AcceptHandler = FindHandler(fssync_server_state.fd);
}
}
AcceptOff()
{
if (AcceptHandler != -1) {
- assert(RemoveHandler(AcceptSd));
+ assert(RemoveHandler(fssync_server_state.fd));
AcceptHandler = -1;
}
}
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2008 Sine Nomine Associates
*/
/*
#define FSYNC_PROTO_VERSION 2
-/* FSYNC command codes */
-#define FSYNC_VOL_ON SYNC_COM_CODE_DECL(0) /* Volume online */
-#define FSYNC_VOL_OFF SYNC_COM_CODE_DECL(1) /* Volume offline */
-#define FSYNC_VOL_LISTVOLUMES SYNC_COM_CODE_DECL(2) /* Update local volume list */
-#define FSYNC_VOL_NEEDVOLUME SYNC_COM_CODE_DECL(3) /* Put volume in whatever mode (offline, or whatever)
- * best fits the attachment mode provided in reason */
-#define FSYNC_VOL_MOVE SYNC_COM_CODE_DECL(4) /* Generate temporary relocation information
- * for this volume to another site, to be used
- * if this volume disappears */
-#define FSYNC_VOL_BREAKCBKS SYNC_COM_CODE_DECL(5) /* Break all the callbacks on this volume */
-#define FSYNC_VOL_DONE SYNC_COM_CODE_DECL(6) /* Done with this volume (used after a delete).
- * Don't put online, but remove from list */
-#define FSYNC_VOL_QUERY SYNC_COM_CODE_DECL(7) /* query the volume state */
-#define FSYNC_VOL_QUERY_HDR SYNC_COM_CODE_DECL(8) /* query the volume disk data structure */
-#define FSYNC_VOL_QUERY_VOP SYNC_COM_CODE_DECL(9) /* query the volume for pending vol op info */
-#define FSYNC_VOL_STATS_GENERAL SYNC_COM_CODE_DECL(10) /* query the general volume package statistics */
-#define FSYNC_VOL_STATS_VICEP SYNC_COM_CODE_DECL(11) /* query the per-partition volume package stats */
-#define FSYNC_VOL_STATS_HASH SYNC_COM_CODE_DECL(12) /* query the per hash-chain volume package stats */
-#define FSYNC_VOL_STATS_HDR SYNC_COM_CODE_DECL(13) /* query the volume header cache statistics */
-#define FSYNC_VOL_STATS_VLRU SYNC_COM_CODE_DECL(14) /* query the VLRU statistics */
-
-/* FSYNC reason codes */
-#define FSYNC_WHATEVER SYNC_REASON_CODE_DECL(0) /* XXXX */
-#define FSYNC_SALVAGE SYNC_REASON_CODE_DECL(1) /* volume is being salvaged */
-#define FSYNC_MOVE SYNC_REASON_CODE_DECL(2) /* volume is being moved */
-#define FSYNC_OPERATOR SYNC_REASON_CODE_DECL(3) /* operator forced volume offline */
-#define FSYNC_EXCLUSIVE SYNC_REASON_CODE_DECL(4) /* somebody else has the volume offline */
-#define FSYNC_UNKNOWN_VOLID SYNC_REASON_CODE_DECL(5) /* volume id not known by fileserver */
-#define FSYNC_HDR_NOT_ATTACHED SYNC_REASON_CODE_DECL(6) /* volume header not currently attached */
-#define FSYNC_NO_PENDING_VOL_OP SYNC_REASON_CODE_DECL(7) /* no volume operation pending */
-#define FSYNC_VOL_PKG_ERROR SYNC_REASON_CODE_DECL(8) /* error in the volume package */
+/**
+ * FSYNC command codes.
+ */
+enum FSYNCOpCode {
+ FSYNC_VOL_ON = SYNC_COM_CODE_DECL(0), /**< bring Volume online */
+ FSYNC_VOL_OFF = SYNC_COM_CODE_DECL(1), /**< take Volume offline */
+ FSYNC_VOL_LISTVOLUMES = SYNC_COM_CODE_DECL(2), /**< Update local volume list */
+ FSYNC_VOL_NEEDVOLUME = SYNC_COM_CODE_DECL(3), /**< Put volume in whatever mode (offline, or whatever)
+ * best fits the attachment mode provided in reason */
+ FSYNC_VOL_MOVE = SYNC_COM_CODE_DECL(4), /**< Generate temporary relocation information
+ * for this volume to another site, to be used
+ * if this volume disappears */
+ FSYNC_VOL_BREAKCBKS = SYNC_COM_CODE_DECL(5), /**< Break all the callbacks on this volume */
+ FSYNC_VOL_DONE = SYNC_COM_CODE_DECL(6), /**< Done with this volume (used after a delete).
+ * Don't put online, but remove from list */
+ FSYNC_VOL_QUERY = SYNC_COM_CODE_DECL(7), /**< query the volume state */
+ FSYNC_VOL_QUERY_HDR = SYNC_COM_CODE_DECL(8), /**< query the volume disk data structure */
+ FSYNC_VOL_QUERY_VOP = SYNC_COM_CODE_DECL(9), /**< query the volume for pending vol op info */
+ FSYNC_VOL_STATS_GENERAL = SYNC_COM_CODE_DECL(10), /**< query the general volume package statistics */
+ FSYNC_VOL_STATS_VICEP = SYNC_COM_CODE_DECL(11), /**< query the per-partition volume package stats */
+ FSYNC_VOL_STATS_HASH = SYNC_COM_CODE_DECL(12), /**< query the per hash-chain volume package stats */
+ FSYNC_VOL_STATS_HDR = SYNC_COM_CODE_DECL(13), /**< query the volume header cache statistics */
+ FSYNC_VOL_STATS_VLRU = SYNC_COM_CODE_DECL(14), /**< query the VLRU statistics */
+ FSYNC_VOL_ATTACH = SYNC_COM_CODE_DECL(15), /**< Force volume online */
+ FSYNC_VOL_FORCE_ERROR = SYNC_COM_CODE_DECL(16), /**< force volume into error state */
+ FSYNC_VOL_LEAVE_OFF = SYNC_COM_CODE_DECL(17), /**< end vol op, but leave volume offline */
+ FSYNC_VOL_QUERY_VNODE = SYNC_COM_CODE_DECL(18), /**< query vnode state */
+};
+
+/**
+ * FSYNC reason codes.
+ */
+enum FSYNCReasonCode {
+ FSYNC_WHATEVER = SYNC_REASON_CODE_DECL(0), /**< XXXX */
+ FSYNC_SALVAGE = SYNC_REASON_CODE_DECL(1), /**< volume is being salvaged */
+ FSYNC_MOVE = SYNC_REASON_CODE_DECL(2), /**< volume is being moved */
+ FSYNC_OPERATOR = SYNC_REASON_CODE_DECL(3), /**< operator forced volume offline */
+ FSYNC_EXCLUSIVE = SYNC_REASON_CODE_DECL(4), /**< somebody else has the volume offline */
+ FSYNC_UNKNOWN_VOLID = SYNC_REASON_CODE_DECL(5), /**< volume id not known by fileserver */
+ FSYNC_HDR_NOT_ATTACHED = SYNC_REASON_CODE_DECL(6), /**< volume header not currently attached */
+ FSYNC_NO_PENDING_VOL_OP = SYNC_REASON_CODE_DECL(7), /**< no volume operation pending */
+ FSYNC_VOL_PKG_ERROR = SYNC_REASON_CODE_DECL(8), /**< error in the volume package */
+ FSYNC_UNKNOWN_VNID = SYNC_REASON_CODE_DECL(9), /**< vnode id not known by fileserver */
+};
/* FSYNC response codes */
char partName[16];
};
+/**
+ * fssync protocol volume operation request message.
+ */
typedef struct FSSYNC_VolOp_hdr {
- afs_uint32 volume; /* volume id associated with request */
- char partName[16]; /* partition name, e.g. /vicepa */
+ afs_uint32 volume; /**< volume id associated with request */
+ char partName[16]; /**< partition name, e.g. /vicepa */
} FSSYNC_VolOp_hdr;
typedef struct FSSYNC_VolOp_command {
struct offlineInfo * volumes;
} FSSYNC_VolOp_command;
+/**
+ * volume operation information node.
+ *
+ * @note this structure is attached to a struct Volume to signify that
+ * a volume operation is in-progress.
+ *
+ * @see Volume
+ * @see VRegisterVolOp_r
+ * @see VDeregisterVolOp_r
+ */
typedef struct FSSYNC_VolOp_info {
SYNC_command_hdr com;
FSSYNC_VolOp_hdr vop;
} FSSYNC_VolOp_info;
+/**
+ * fssync protocol volume package statistics request node.
+ */
typedef struct FSSYNC_StatsOp_hdr {
union {
- afs_uint32 vlru_generation;
- afs_uint32 hash_bucket;
- char partName[16];
+ afs_uint32 vlru_generation; /**< vlru generation id */
+ afs_uint32 hash_bucket; /**< volume hash bucket */
+ char partName[16]; /**< partition name */
} args;
} FSSYNC_StatsOp_hdr;
SYNC_command * com;
} FSSYNC_StatsOp_command;
-
+/**
+ * fssync protocol vnode query request message.
+ */
+typedef struct FSSYNC_VnQry_hdr {
+ afs_uint32 volume; /**< volume id */
+ afs_uint32 vnode; /**< vnode id */
+ afs_uint32 unique; /**< uniqifier */
+ afs_uint32 spare; /**< reserved for future use */
+ char partName[16]; /**< partition name */
+} FSSYNC_VnQry_hdr;
+
+
+#define FSSYNC_IN_PORT 2040
+#define FSSYNC_UN_PATH "fssync.sock"
+#define FSSYNC_ENDPOINT_DECL SYNC_ENDPOINT_DECL(FSSYNC_IN_PORT, FSSYNC_UN_PATH)
/*
* common interfaces
static void SalvageServer(void);
static void SalvageClient(VolumeId vid, char * pname);
+static int ChildFailed(int status);
+
static int Reap_Child(char * prog, int * pid, int * status);
static void * SalvageLogCleanupThread(void *);
}
assert (slot < Parallel);
+ do_fork:
pid = Fork();
if (pid == 0) {
VOL_UNLOCK;
ret = DoSalvageVolume(node, slot);
Exit(ret);
} else if (pid < 0) {
- VOL_UNLOCK;
- SALVSYNC_doneWork(node, 1);
+ Log("failed to fork child worker process\n");
+ sleep(1);
+ goto do_fork;
} else {
child_slot[slot] = pid;
node->pid = pid;
int slot, pid, status, code, found;
struct SalvageQueueNode *qp, *nqp;
struct log_cleanup_node * cleanup;
+ SALVSYNC_command_info info;
assert(pthread_mutex_lock(&worker_lock) == 0);
child_slot[slot] = 0;
VOL_UNLOCK;
+ SALVSYNC_doneWorkByPid(pid, status);
+
assert(pthread_mutex_lock(&worker_lock) == 0);
if (cleanup) {
/* ok, we've reaped a child */
current_workers--;
- SALVSYNC_doneWorkByPid(pid, WEXITSTATUS(status));
assert(pthread_cond_broadcast(&worker_cv) == 0);
}
/*
- * Copyright 2006-2007, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
static SYNC_client_state salvsync_client_state =
{ -1, /* file descriptor */
- 2041, /* port */
+ SALVSYNC_ENDPOINT_DECL, /* server endpoint */
SALVSYNC_PROTO_VERSION, /* protocol version */
5, /* connect retry limit */
120, /* hard timeout */
/*
- * Copyright 2006-2007, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
#include "volume.h"
#include "partition.h"
#include <rx/rx_queue.h>
+#include <afs/procmgmt.h>
#if !defined(offsetof)
#include <stddef.h>
#define MAXHANDLERS 4 /* Up to 4 clients; must be at least 2, so that
* move = dump+restore can run on single server */
-#define MAX_BIND_TRIES 5 /* Number of times to retry socket bind */
-
-
-
/* Forward declarations */
static void * SALVSYNC_syncThread(void *);
static void SALVSYNC_newconnection(int fd);
extern int VInit;
extern pthread_mutex_t vol_salvsync_mutex;
-static int AcceptSd = -1; /* Socket used by server for accepting connections */
+/**
+ * salvsync server socket handle.
+ */
+static SYNC_server_state_t salvsync_server_state =
+ { -1, /* file descriptor */
+ SALVSYNC_ENDPOINT_DECL, /* server endpoint */
+ SALVSYNC_PROTO_VERSION, /* protocol version */
+ 5, /* bind() retry limit */
+ 100, /* listen() queue depth */
+ "SALVSYNC", /* protocol name string */
+ };
/**
assert(pthread_create(&tid, &tattr, SALVSYNC_syncThread, NULL) == 0);
}
-#ifdef USE_UNIX_SOCKETS
-static int
-getport(struct sockaddr_un *addr)
-{
- int sd;
- char tbuffer[AFSDIR_PATH_MAX];
-
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- "fssync.sock", NULL);
-
- memset(addr, 0, sizeof(*addr));
- addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, tbuffer, (sizeof(struct sockaddr_un) - sizeof(short)));
- assert((sd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0);
- return sd;
-}
-#else
-static int
-getport(struct sockaddr_in *addr)
-{
- int sd;
-
- memset(addr, 0, sizeof(*addr));
- assert((sd = socket(AF_INET, SOCK_STREAM, 0)) >= 0);
-#ifdef STRUCT_SOCKADDR_HAS_SA_LEN
- addr->sin_len = sizeof(struct sockaddr_in);
-#endif
- addr->sin_addr.s_addr = htonl(0x7f000001);
- addr->sin_family = AF_INET; /* was localhost->h_addrtype */
- addr->sin_port = htons(2041); /* XXXX htons not _really_ neccessary */
-
- return sd;
-}
-#endif
static fd_set SALVSYNC_readfds;
static void *
SALVSYNC_syncThread(void * args)
{
- struct sockaddr_in addr;
int on = 1;
int code;
int numTries;
int tid;
-#ifdef USE_UNIX_SOCKETS
- char tbuffer[AFSDIR_PATH_MAX];
-#endif
+ SYNC_server_state_t * state = &salvsync_server_state;
+
+ SYNC_getAddr(&state->endpoint, &state->addr);
+ SYNC_cleanupSock(state);
#ifndef AFS_NT40_ENV
(void)signal(SIGPIPE, SIG_IGN);
pthread_setspecific(rx_thread_id_key, (void *)tid);
Log("Set thread id %d for SALVSYNC_syncThread\n", tid);
-#ifdef USE_UNIX_SOCKETS
- strcompose(tbuffer, AFSDIR_PATH_MAX, AFSDIR_SERVER_LOCAL_DIRPATH, "/",
- "fssync.sock", NULL);
- /* ignore errors */
- remove(tbuffer);
-#endif /* USE_UNIX_SOCKETS */
-
- AcceptSd = getport(&addr);
- /* Reuseaddr needed because system inexplicably leaves crud lying around */
- code =
- setsockopt(AcceptSd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
- sizeof(on));
- if (code)
- Log("SALVSYNC_sync: setsockopt failed with (%d)\n", errno);
-
- for (numTries = 0; numTries < MAX_BIND_TRIES; numTries++) {
- if ((code =
- bind(AcceptSd, (struct sockaddr *)&addr, sizeof(addr))) == 0)
- break;
- Log("SALVSYNC_sync: bind failed with (%d), will sleep and retry\n",
- errno);
- sleep(5);
- }
+ state->fd = SYNC_getSock(&state->endpoint);
+ code = SYNC_bindSock(state);
assert(!code);
- listen(AcceptSd, 100);
+
InitHandler();
AcceptOn();
}
}
+/**
+ * request that a volume be salvaged.
+ *
+ * @param[in] com inbound command object
+ * @param[out] res outbound response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_DENIED failed to enqueue request
+ * @retval SYNC_FAILED malformed command packet
+ *
+ * @note this is a SALVSYNC protocol rpc handler
+ *
+ * @internal
+ *
+ * @post the volume is enqueued in the to-be-salvaged queue.
+ * if the volume was already in the salvage queue, its
+ * priority (and thus its location in the queue) are
+ * updated.
+ */
static afs_int32
SALVSYNC_com_Salvage(SALVSYNC_command * com, SALVSYNC_response * res)
{
case SALVSYNC_STATE_UNKNOWN:
memcpy(&clone->command.com, com->hdr, sizeof(SYNC_command_hdr));
memcpy(&clone->command.sop, com->sop, sizeof(SALVSYNC_command_hdr));
+
+ /*
+ * make sure volgroup parent partition path is kept coherent
+ *
+ * If we ever want to support non-COW clones on a machine holding
+ * the RW site, please note that this code does not work under the
+ * conditions where someone zaps a COW clone on partition X, and
+ * subsequently creates a full clone on partition Y -- we'd need
+ * an inverse to SALVSYNC_com_Link.
+ * -- tkeiser 11/28/2007
+ */
+ strcpy(node->command.sop.partName, com->sop->partName);
+
if (AddToSalvageQueue(node)) {
code = SYNC_DENIED;
}
return code;
}
+/**
+ * cancel a pending salvage request.
+ *
+ * @param[in] com inbound command object
+ * @param[out] res outbound response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_FAILED malformed command packet
+ *
+ * @note this is a SALVSYNC protocol rpc handler
+ *
+ * @internal
+ */
static afs_int32
SALVSYNC_com_Cancel(SALVSYNC_command * com, SALVSYNC_response * res)
{
return code;
}
+/**
+ * cancel all pending salvage requests.
+ *
+ * @param[in] com incoming command object
+ * @param[out] res outbound response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ *
+ * @note this is a SALVSYNC protocol rpc handler
+ *
+ * @internal
+ */
static afs_int32
SALVSYNC_com_CancelAll(SALVSYNC_command * com, SALVSYNC_response * res)
{
/**
* link a queue node for a clone to its parent volume.
+ *
+ * @param[in] com inbound command object
+ * @param[out] res outbound response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_FAILED malformed command packet
+ * @retval SYNC_DENIED the request could not be completed
+ *
+ * @note this is a SALVSYNC protocol rpc handler
+ *
+ * @post the requested volume is marked as a child of another volume.
+ * thus, future salvage requests for this volume will result in the
+ * parent of the volume group being scheduled for salvage instead
+ * of this clone.
+ *
+ * @internal
*/
static afs_int32
SALVSYNC_com_Link(SALVSYNC_command * com, SALVSYNC_response * res)
return code;
}
+/**
+ * query the status of a volume salvage request.
+ *
+ * @param[in] com inbound command object
+ * @param[out] res outbound response object
+ *
+ * @return operation status
+ * @retval SYNC_OK success
+ * @retval SYNC_FAILED malformed command packet
+ *
+ * @note this is a SALVSYNC protocol rpc handler
+ *
+ * @internal
+ */
static afs_int32
SALVSYNC_com_Query(SALVSYNC_command * com, SALVSYNC_response * res)
{
AcceptOn(void)
{
if (AcceptHandler == -1) {
- assert(AddHandler(AcceptSd, SALVSYNC_newconnection));
- AcceptHandler = FindHandler(AcceptSd);
+ assert(AddHandler(salvsync_server_state.fd, SALVSYNC_newconnection));
+ AcceptHandler = FindHandler(salvsync_server_state.fd);
}
}
AcceptOff(void)
{
if (AcceptHandler != -1) {
- assert(RemoveHandler(AcceptSd));
+ assert(RemoveHandler(salvsync_server_state.fd));
AcceptHandler = -1;
}
}
ReleaseReadLock(&SALVSYNC_handler_lock); /* just in case */
}
+/**
+ * allocate a salvage queue node.
+ *
+ * @param[out] node_out address in which to store new node pointer
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 failed to allocate node
+ *
+ * @internal
+ */
static int
AllocNode(struct SalvageQueueNode ** node_out)
{
return code;
}
+/**
+ * link a salvage queue node to its parent.
+ *
+ * @param[in] parent pointer to queue node for parent of volume group
+ * @param[in] clone pointer to queue node for a clone
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 failure
+ *
+ * @internal
+ */
static int
LinkNode(struct SalvageQueueNode * parent,
struct SalvageQueueNode * clone)
* if there are no disk partitions, just sit in this wait loop forever
*/
while (!salvageQueue.total_len || !DiskPartitionList) {
- assert(pthread_cond_wait(&salvageQueue.cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&salvageQueue.cv);
}
/*
return node;
}
+/**
+ * update internal scheduler state to reflect completion of a work unit.
+ *
+ * @param[in] node salvage queue node object pointer
+ * @param[in] result worker process result code
+ *
+ * @post scheduler state is updated.
+ *
+ * @internal
+ */
static void
SALVSYNC_doneWork_r(struct SalvageQueueNode * node, int result)
{
}
}
-void
-SALVSYNC_doneWork(struct SalvageQueueNode * node, int result)
+/**
+ * check whether worker child failed.
+ *
+ * @param[in] status status bitfield return by wait()
+ *
+ * @return boolean failure code
+ * @retval 0 child succeeded
+ * @retval 1 child failed
+ *
+ * @internal
+ */
+static int
+ChildFailed(int status)
{
- VOL_LOCK;
- SALVSYNC_doneWork_r(node, result);
- VOL_UNLOCK;
+ return (WCOREDUMP(status) ||
+ WIFSIGNALED(status) ||
+ ((WEXITSTATUS(status) != 0) &&
+ (WEXITSTATUS(status) != SALSRV_EXIT_VOLGROUP_LINK)));
}
+
+/**
+ * notify salvsync scheduler of node completion, by child pid.
+ *
+ * @param[in] pid pid of worker child
+ * @param[in] status worker status bitfield from wait()
+ *
+ * @post scheduler state is updated.
+ * if status code is a failure, fileserver notification was attempted
+ *
+ * @see SALVSYNC_doneWork_r
+ */
void
-SALVSYNC_doneWorkByPid(int pid, int result)
+SALVSYNC_doneWorkByPid(int pid, int status)
{
struct SalvageQueueNode * node;
+ char partName[16];
+ afs_uint32 volids[VOLMAXTYPES+1];
+ unsigned int idx;
+
+ memset(volids, 0, sizeof(volids));
VOL_LOCK;
node = LookupPendingCommandByPid(pid);
if (node != NULL) {
- SALVSYNC_doneWork_r(node, result);
+ SALVSYNC_doneWork_r(node, status);
+
+ if (ChildFailed(status)) {
+ /* populate volume id list for later processing outside the glock */
+ volids[0] = node->command.sop.volume;
+ strcpy(partName, node->command.sop.partName);
+ if (node->type == SALVSYNC_VOLGROUP_PARENT) {
+ for (idx = 0; idx < VOLMAXTYPES; idx++) {
+ if (node->volgroup.children[idx]) {
+ volids[idx+1] = node->volgroup.children[idx]->command.sop.volume;
+ }
+ }
+ }
+ }
}
VOL_UNLOCK;
+
+ /*
+ * if necessary, notify fileserver of
+ * failure to salvage volume group
+ * [we cannot guarantee that the child made the
+ * appropriate notifications (e.g. SIGSEGV)]
+ * -- tkeiser 11/28/2007
+ */
+ if (ChildFailed(status)) {
+ for (idx = 0; idx <= VOLMAXTYPES; idx++) {
+ if (volids[idx]) {
+ FSYNC_VolOp(volids[idx],
+ partName,
+ FSYNC_VOL_FORCE_ERROR,
+ FSYNC_WHATEVER,
+ NULL);
+ }
+ }
+ }
}
#endif /* AFS_DEMAND_ATTACH_FS */
/*
- * Copyright 2006-2007, Sine Nomine Associates and others.
+ * Copyright 2006-2008, Sine Nomine Associates and others.
* All Rights Reserved.
*
* This software has been released under the terms of the IBM Public
int pid;
};
+#define SALVSYNC_IN_PORT 2041
+#define SALVSYNC_UN_PATH "salvsync.sock"
+#define SALVSYNC_ENDPOINT_DECL SYNC_ENDPOINT_DECL(SALVSYNC_IN_PORT, SALVSYNC_UN_PATH)
/* Prototypes from salvsync.c */
/* salvage server interfaces */
extern void SALVSYNC_salvInit(void);
extern struct SalvageQueueNode * SALVSYNC_getWork(void);
-extern void SALVSYNC_doneWork(struct SalvageQueueNode *, int result);
extern void SALVSYNC_doneWorkByPid(int pid, int result);
#endif /* AFS_DEMAND_ATTACH_FS */
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2005-2008 Sine Nomine Associates
*/
/*
#endif /* AFS_PTHREAD_ENV */
#include <rx/xdr.h>
+#include "rx/rx_queue.h"
#include <afs/afsint.h>
#include "nfs.h"
#include <afs/errors.h>
#include "ihandle.h"
#include "vnode.h"
#include "volume.h"
+#include "volume_inline.h"
+#include "vnode_inline.h"
#include "partition.h"
#include "salvsync.h"
#if defined(AFS_SGI_ENV)
struct VnodeClassInfo VnodeClassInfo[nVNODECLASSES];
-private int moveHash(register Vnode * vnp, bit32 newHash);
private void StickOnLruChain_r(register Vnode * vnp,
register struct VnodeClassInfo *vcp);
extern int LogLevel;
+
+
+
#define BAD_IGET -1000
/* There are two separate vnode queue types defined here:
#define VNODE_HASH(volumeptr,vnodenumber)\
((volumeptr->vnodeHashOffset + vnodenumber)&(VNODE_HASH_TABLE_SIZE-1))
-/*
- * new support to secondarily hash vnodes by volume id
- */
-#define VNVOLUME_HASH(volumeId) (volumeId&(VolumeHashTable.Mask))
-
-#include "rx/rx_queue.h"
-typedef struct VnodeHashByVolumeChainHead {
- struct rx_queue queue;
- int len;
- /* someday we could put a per-chain lock here... */
-#ifdef AFS_DEMAND_ATTACH_FS
- int busy;
- pthread_cond_t chain_busy_cv;
-#endif /* AFS_DEMAND_ATTACH_FS */
-} VnodeHashByVolumeChainHead;
-private VnodeHashByVolumeChainHead *VnodeHashByVolumeTable = NULL;
+/**
+ * add a vnode to the volume's vnode list.
+ *
+ * @param[in] vp volume object pointer
+ * @param[in] vnp vnode object pointer
+ *
+ * @note for DAFS, it may seem like we should be acquiring a lightweight ref
+ * on vp, but this would actually break things. Right now, this is ok
+ * because we destroy all vnode cache contents during during volume
+ * detach.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @internal volume package internal use only
+ */
void
-VInitVnHashByVolume(void)
+AddToVVnList(Volume * vp, Vnode * vnp)
{
- register int i;
+ if (queue_IsOnQueue(vnp))
+ return;
- VnodeHashByVolumeTable = (VnodeHashByVolumeChainHead *) calloc(VolumeHashTable.Size,
- sizeof(VnodeHashByVolumeChainHead));
- assert(VnodeHashByVolumeTable != NULL);
-
- for (i=0; i < VolumeHashTable.Size; i++) {
- queue_Init(&VnodeHashByVolumeTable[i]);
-#ifdef AFS_DEMAND_ATTACH_FS
- assert(pthread_cond_init(&VnodeHashByVolumeTable[i].chain_busy_cv, NULL) == 0);
-#endif /* AFS_DEMAND_ATTACH_FS */
- }
+ Vn_volume(vnp) = vp;
+ Vn_cacheCheck(vnp) = vp->cacheCheck;
+ queue_Append(&vp->vnode_list, vnp);
+ Vn_stateFlags(vnp) |= VN_ON_VVN;
}
-static void
-AddToVnHashByVolumeTable(register Vnode * vnp)
+/**
+ * delete a vnode from the volume's vnode list.
+ *
+ * @pre VOL_LOCK held
+ *
+ * @internal volume package internal use only
+ */
+void
+DeleteFromVVnList(register Vnode * vnp)
{
- VnodeHashByVolumeChainHead * head;
+ Vn_volume(vnp) = NULL;
- if (queue_IsOnQueue(vnp))
+ if (!queue_IsOnQueue(vnp))
return;
- head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vnp->volumePtr->hashid)];
+ queue_Remove(vnp);
+ Vn_stateFlags(vnp) &= ~(VN_ON_VVN);
+}
-#ifdef AFS_DEMAND_ATTACH_FS
- while (head->busy) {
- /* if the hash table is busy, wait */
- assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+/**
+ * add a vnode to the end of the lru.
+ *
+ * @param[in] vcp vnode class info object pointer
+ * @param[in] vnp vnode object pointer
+ *
+ * @internal vnode package internal use only
+ */
+void
+AddToVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
+{
+ if (Vn_stateFlags(vnp) & VN_ON_LRU) {
+ return;
}
-#endif /* AFS_DEMAND_ATTACH_FS */
- head->len++;
- queue_Append(head, vnp);
+ /* Add it to the circular LRU list */
+ if (vcp->lruHead == NULL)
+ Abort("VPutVnode: vcp->lruHead==NULL");
+ else {
+ vnp->lruNext = vcp->lruHead;
+ vnp->lruPrev = vcp->lruHead->lruPrev;
+ vcp->lruHead->lruPrev = vnp;
+ vnp->lruPrev->lruNext = vnp;
+ vcp->lruHead = vnp;
+ }
+
+ /* If the vnode was just deleted, put it at the end of the chain so it
+ * will be reused immediately */
+ if (vnp->delete)
+ vcp->lruHead = vnp->lruNext;
+
+ Vn_stateFlags(vnp) |= VN_ON_LRU;
}
-/* for demand-attach, caller MUST hold a ref count on vp */
-static void
-DeleteFromVnHashByVolumeTable(register Vnode * vnp)
+/**
+ * delete a vnode from the lru.
+ *
+ * @param[in] vcp vnode class info object pointer
+ * @param[in] vnp vnode object pointer
+ *
+ * @internal vnode package internal use only
+ */
+void
+DeleteFromVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp)
{
- VnodeHashByVolumeChainHead * head;
-
- if (!queue_IsOnQueue(vnp))
+ if (!(Vn_stateFlags(vnp) & VN_ON_LRU)) {
return;
+ }
- head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vnp->volumePtr->hashid)];
+ if (vnp == vcp->lruHead)
+ vcp->lruHead = vcp->lruHead->lruNext;
-#ifdef AFS_DEMAND_ATTACH_FS
- while (head->busy) {
- /* if the hash table is busy, wait */
- assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ if ((vnp == vcp->lruHead) ||
+ (vcp->lruHead == NULL))
+ Abort("DeleteFromVnLRU: lru chain addled!\n");
+
+ vnp->lruPrev->lruNext = vnp->lruNext;
+ vnp->lruNext->lruPrev = vnp->lruPrev;
+
+ Vn_stateFlags(vnp) &= ~(VN_ON_LRU);
+}
+
+/**
+ * add a vnode to the vnode hash table.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post vnode on hash
+ *
+ * @internal vnode package internal use only
+ */
+void
+AddToVnHash(Vnode * vnp)
+{
+ unsigned int newHash;
+
+ if (!(Vn_stateFlags(vnp) & VN_ON_HASH)) {
+ newHash = VNODE_HASH(Vn_volume(vnp), Vn_id(vnp));
+ vnp->hashNext = VnodeHashTable[newHash];
+ VnodeHashTable[newHash] = vnp;
+ vnp->hashIndex = newHash;
+
+ Vn_stateFlags(vnp) |= VN_ON_HASH;
}
-#endif /* AFS_DEMAND_ATTACH_FS */
+}
- head->len--;
- queue_Remove(vnp);
+/**
+ * delete a vnode from the vnode hash table.
+ *
+ * @param[in] vnp
+ * @param[in] hash
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post vnode removed from hash
+ *
+ * @internal vnode package internal use only
+ */
+void
+DeleteFromVnHash(Vnode * vnp)
+{
+ Vnode * tvnp;
+
+ if (Vn_stateFlags(vnp) & VN_ON_HASH) {
+ tvnp = VnodeHashTable[vnp->hashIndex];
+ if (tvnp == vnp)
+ VnodeHashTable[vnp->hashIndex] = vnp->hashNext;
+ else {
+ while (tvnp && tvnp->hashNext != vnp)
+ tvnp = tvnp->hashNext;
+ if (tvnp)
+ tvnp->hashNext = vnp->hashNext;
+ }
+
+ vnp->hashNext = NULL;
+ vnp->hashIndex = 0;
+ Vn_stateFlags(vnp) &= ~(VN_ON_HASH);
+ }
}
-/* Code to invalidate a vnode entry. Called when we've damaged a vnode, and want
- to prevent future VGetVnode's from applying to it. Leaves it in the same hash bucket
- but that shouldn't be important. */
+
+/**
+ * invalidate a vnode cache entry.
+ *
+ * @param[in] avnode vnode object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post vnode metadata invalidated.
+ * vnode removed from hash table.
+ * DAFS: vnode state set to VN_STATE_INVALID.
+ *
+ * @internal vnode package internal use only
+ */
void
VInvalidateVnode_r(register struct Vnode *avnode)
{
avnode->changed_newTime = 0; /* don't let it get flushed out again */
avnode->changed_oldTime = 0;
- avnode->delete = 0; /* it isn't deleted, erally */
+ avnode->delete = 0; /* it isn't deleted, really */
avnode->cacheCheck = 0; /* invalid: prevents future vnode searches from working */
+ DeleteFromVnHash(avnode);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(avnode, VN_STATE_INVALID);
+#endif
}
-/* Not normally called by general client; called by volume.c */
+
+/**
+ * initialize vnode cache for a given vnode class.
+ *
+ * @param[in] class vnode class
+ * @param[in] nVnodes size of cache
+ *
+ * @post vnode cache allocated and initialized
+ *
+ * @internal volume package internal use only
+ *
+ * @note generally called by VInitVolumePackage_r
+ *
+ * @see VInitVolumePackage_r
+ */
int
VInitVnodes(VnodeClass class, int nVnodes)
{
assert(va != NULL);
while (nVnodes--) {
Vnode *vnp = (Vnode *) va;
- vnp->nUsers = 0; /* no context switches */
+ Vn_refcount(vnp) = 0; /* no context switches */
+ Vn_stateFlags(vnp) |= VN_ON_LRU;
+#ifdef AFS_DEMAND_ATTACH_FS
+ assert(pthread_cond_init(&Vn_stateCV(vnp), NULL) == 0);
+ Vn_state(vnp) = VN_STATE_INVALID;
+ Vn_readers(vnp) = 0;
+#else /* !AFS_DEMAND_ATTACH_FS */
Lock_Init(&vnp->lock);
+#endif /* !AFS_DEMAND_ATTACH_FS */
vnp->changed_oldTime = 0;
vnp->changed_newTime = 0;
- vnp->volumePtr = NULL;
- vnp->cacheCheck = 0;
- vnp->delete = vnp->vnodeNumber = 0;
+ Vn_volume(vnp) = NULL;
+ Vn_cacheCheck(vnp) = 0;
+ vnp->delete = Vn_id(vnp) = 0;
#ifdef AFS_PTHREAD_ENV
vnp->writer = (pthread_t) 0;
#else /* AFS_PTHREAD_ENV */
#endif /* AFS_PTHREAD_ENV */
vnp->hashIndex = 0;
vnp->handle = NULL;
+ Vn_class(vnp) = vcp;
if (vcp->lruHead == NULL)
vcp->lruHead = vnp->lruNext = vnp->lruPrev = vnp;
else {
}
-/* allocate an *unused* vnode from the LRU chain, going backwards of course. It shouldn't
- be necessary to specify that nUsers == 0 since if it is in the list, nUsers
- should be 0. Things shouldn't be in lruq unless no one is using them. */
+/**
+ * allocate an unused vnode from the lru chain.
+ *
+ * @param[in] vcp vnode class info object pointer
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @post vnode object is removed from lru, and vnode hash table.
+ * vnode is disassociated from volume object.
+ * state is set to VN_STATE_INVALID.
+ * inode handle is released.
+ *
+ * @note we traverse backwards along the lru circlist. It shouldn't
+ * be necessary to specify that nUsers == 0 since if it is in the list,
+ * nUsers should be 0. Things shouldn't be in lruq unless no one is
+ * using them.
+ *
+ * @warning DAFS: VOL_LOCK is dropped while doing inode handle release
+ *
+ * @return vnode object pointer
+ */
Vnode *
VGetFreeVnode_r(struct VnodeClassInfo * vcp)
{
register Vnode *vnp;
vnp = vcp->lruHead->lruPrev;
- if (vnp->nUsers != 0 || CheckLock(&vnp->lock))
- Abort("locked vnode in lruq");
- VNLog(1, 2, vnp->vnodeNumber, (afs_int32) vnp);
- IH_RELEASE(vnp->handle);
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (Vn_refcount(vnp) != 0 || VnIsExclusiveState(Vn_state(vnp)) ||
+ Vn_readers(vnp) != 0)
+ Abort("VGetFreeVnode_r: in-use vnode in lruq");
+#else
+ if (Vn_refcount(vnp) != 0 || CheckLock(&vnp->lock))
+ Abort("VGetFreeVnode_r: locked vnode in lruq");
+#endif
+ VNLog(1, 2, Vn_id(vnp), (afs_int32) vnp);
+
+ /*
+ * it's going to be overwritten soon enough.
+ * remove from LRU, delete hash entry, and
+ * disassociate from old parent volume before
+ * we have a chance to drop the vol glock
+ */
+ DeleteFromVnLRU(vcp, vnp);
+ DeleteFromVnHash(vnp);
+ if (Vn_volume(vnp)) {
+ DeleteFromVVnList(vnp);
+ }
+
+ /* drop the file descriptor */
+ if (vnp->handle) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_RELEASING);
+ VOL_UNLOCK;
+#endif
+ IH_RELEASE(vnp->handle);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VOL_LOCK;
+#endif
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_INVALID);
+#endif
+
+ return vnp;
+}
+
+
+/**
+ * lookup a vnode in the vnode cache hash table.
+ *
+ * @param[in] vp pointer to volume object
+ * @param[in] vnodeId vnode id
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post matching vnode object or NULL is returned
+ *
+ * @return vnode object pointer
+ * @retval NULL no matching vnode object was found in the cache
+ *
+ * @internal vnode package internal use only
+ *
+ * @note this symbol is exported strictly for fssync debug protocol use
+ */
+Vnode *
+VLookupVnode(Volume * vp, VnodeId vnodeId)
+{
+ Vnode * vnp;
+ unsigned int newHash;
+
+ newHash = VNODE_HASH(vp, vnodeId);
+ for (vnp = VnodeHashTable[newHash];
+ (vnp &&
+ ((Vn_id(vnp) != vnodeId) ||
+ (Vn_volume(vnp) != vp) ||
+ (vp->cacheCheck != Vn_cacheCheck(vnp))));
+ vnp = vnp->hashNext);
+
return vnp;
}
-static mlkReason = 0;
-static mlkLastAlloc = 0;
-static mlkLastOver = 0;
-static mlkLastDelete = 0;
Vnode *
VAllocVnode(Error * ec, Volume * vp, VnodeType type)
return retVal;
}
+/**
+ * allocate a new vnode.
+ *
+ * @param[out] ec error code return
+ * @param[in] vp volume object pointer
+ * @param[in] type desired vnode type
+ *
+ * @return vnode object pointer
+ *
+ * @pre VOL_LOCK held;
+ * heavyweight ref held on vp
+ *
+ * @post vnode allocated and returned
+ */
Vnode *
VAllocVnode_r(Error * ec, Volume * vp, VnodeType type)
{
register Vnode *vnp;
VnodeId vnodeNumber;
- int newHash, bitNumber;
+ int bitNumber, code;
register struct VnodeClassInfo *vcp;
VnodeClass class;
Unique unique;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState vol_state_save;
+#endif
*ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * once a volume has entered an error state, don't permit
+ * further operations to proceed
+ * -- tkeiser 11/21/2007
+ */
+ VWaitExclusiveState_r(vp);
+ if (VIsErrorState(V_attachState(vp))) {
+ /* XXX is VSALVAGING acceptable here? */
+ *ec = DAFS_VSALVAGE;
+ return NULL;
+ }
+#endif
+
if (programType == fileServer && !V_inUse(vp)) {
if (vp->specialStatus) {
*ec = vp->specialStatus;
unique = vp->nextVnodeUnique++;
if (vp->nextVnodeUnique > V_uniquifier(vp)) {
- VUpdateVolume_r(ec, vp, VOL_UPDATE_WAIT);
+ VUpdateVolume_r(ec, vp, 0);
if (*ec)
return NULL;
}
return NULL;
vnodeNumber = bitNumberToVnodeNumber(bitNumber, class);
+ /*
+ * DAFS:
+ * at this point we should be assured that V_attachState(vp) is non-exclusive
+ */
+
vnrehash:
VNLog(2, 1, vnodeNumber);
/* Prepare to move it to the new hash chain */
- newHash = VNODE_HASH(vp, vnodeNumber);
- for (vnp = VnodeHashTable[newHash];
- vnp && (vnp->vnodeNumber != vnodeNumber || vnp->volumePtr != vp
- || vnp->volumePtr->cacheCheck != vnp->cacheCheck);
- vnp = vnp->hashNext);
+ vnp = VLookupVnode(vp, vnodeNumber);
if (vnp) {
/* slot already exists. May even not be in lruq (consider store file locking a file being deleted)
* so we may have to wait for it below */
VNLog(3, 2, vnodeNumber, (afs_int32) vnp);
- /* If first user, remove it from the LRU chain. We can assume that
- * there is at least one item in the queue */
- if (++vnp->nUsers == 1) {
- if (vnp == vcp->lruHead)
- vcp->lruHead = vcp->lruHead->lruNext;
- vnp->lruPrev->lruNext = vnp->lruNext;
- vnp->lruNext->lruPrev = vnp->lruPrev;
- if (vnp == vcp->lruHead || vcp->lruHead == NULL)
- Abort("VGetVnode: lru chain addled!\n");
+ VnCreateReservation_r(vnp);
+ if (Vn_refcount(vnp) == 1) {
+ /* we're the only user */
/* This won't block */
- ObtainWriteLock(&vnp->lock);
+ VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
} else {
- /* follow locking hierarchy */
- VOL_UNLOCK;
- ObtainWriteLock(&vnp->lock);
- VOL_LOCK;
- if (vnp->volumePtr->cacheCheck != vnp->cacheCheck) {
- ReleaseWriteLock(&vnp->lock);
+ /* other users present; follow locking hierarchy */
+ VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, MIGHT_DEADLOCK);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * DAFS:
+ * vnode was cached, wait for any existing exclusive ops to finish.
+ * once we have reacquired the lock, re-verify volume state.
+ *
+ * note: any vnode error state is related to the old vnode; disregard.
+ */
+ VnWaitQuiescent_r(vnp);
+ if (VIsErrorState(V_attachState(vp))) {
+ VnUnlock(vnp, WRITE_LOCK);
+ VnCancelReservation_r(vnp);
+ *ec = DAFS_VSALVAGE;
+ return NULL;
+ }
+#endif
+
+ /*
+ * verify state of the world hasn't changed
+ *
+ * (technically, this should never happen because cachecheck
+ * is only updated during a volume attach, which should not
+ * happen when refs are held)
+ */
+ if (Vn_volume(vnp)->cacheCheck != Vn_cacheCheck(vnp)) {
+ VnUnlock(vnp, WRITE_LOCK);
+ VnCancelReservation_r(vnp);
goto vnrehash;
}
}
-#ifdef AFS_PTHREAD_ENV
- vnp->writer = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&vnp->writer);
-#endif /* AFS_PTHREAD_ENV */
+
} else {
+ /* no such vnode in the cache */
+
vnp = VGetFreeVnode_r(vcp);
- /* Remove vnode from LRU chain and grab a write lock */
- if (vnp == vcp->lruHead)
- vcp->lruHead = vcp->lruHead->lruNext;
- vnp->lruPrev->lruNext = vnp->lruNext;
- vnp->lruNext->lruPrev = vnp->lruPrev;
- if (vnp == vcp->lruHead || vcp->lruHead == NULL)
- Abort("VGetVnode: lru chain addled!\n");
+
/* Initialize the header fields so noone allocates another
* vnode with the same number */
- vnp->vnodeNumber = vnodeNumber;
- vnp->volumePtr = vp;
- vnp->cacheCheck = vp->cacheCheck;
- vnp->nUsers = 1;
- /* This will never block */
- ObtainWriteLock(&vnp->lock);
-#ifdef AFS_PTHREAD_ENV
- vnp->writer = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&vnp->writer);
-#endif /* AFS_PTHREAD_ENV */
+ Vn_id(vnp) = vnodeNumber;
+ VnCreateReservation_r(vnp);
+ AddToVVnList(vp, vnp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ AddToVnHash(vnp);
+#endif
+
+ /* This will never block (guaranteed by check in VGetFreeVnode_r() */
+ VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_ALLOC);
+#endif
+
/* Sanity check: is this vnode really not in use? */
{
int size;
/* XXX we have a potential race here if two threads
* allocate new vnodes at the same time, and they
* both decide it's time to extend the index
- * file size... */
+ * file size...
+ */
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * this race has been eliminated for the DAFS case
+ * using exclusive state VOL_STATE_VNODE_ALLOC
+ *
+ * if this becomes a bottleneck, there are ways to
+ * improve parallelism for this code path
+ * -- tkeiser 11/28/2007
+ */
+ VCreateReservation_r(vp);
+ VWaitExclusiveState_r(vp);
+ vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_ALLOC);
+#endif
VOL_UNLOCK;
fdP = IH_OPEN(ihP);
free(buf);
}
FDH_CLOSE(fdP);
- fdP = NULL;
VOL_LOCK;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VChangeState_r(vp, vol_state_save);
+ VCancelReservation_r(vp);
+#endif
goto sane;
+
error_encountered:
#ifdef AFS_DEMAND_ATTACH_FS
- VOL_LOCK;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ /*
+ * close the file handle
+ * acquire VOL_LOCK
+ * invalidate the vnode
+ * free up the bitmap entry (although salvager should take care of it)
+ * salvage the volume
+ * drop vnode lock and refs
+ */
if (fdP)
FDH_CLOSE(fdP);
+ VOL_LOCK;
+ VFreeBitMapEntry_r(ec, &vp->vnodeIndex[class], bitNumber);
VInvalidateVnode_r(vnp);
- StickOnLruChain_r(vnp, vcp);
+ VnUnlock(vnp, WRITE_LOCK);
+ VnCancelReservation_r(vnp);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
+ VCancelReservation_r(vp);
return NULL;
#else
assert(1 == 2);
}
sane:
VNLog(4, 2, vnodeNumber, (afs_int32) vnp);
- AddToVnHashByVolumeTable(vnp);
- moveHash(vnp, newHash);
+#ifndef AFS_DEMAND_ATTACH_FS
+ AddToVnHash(vnp);
+#endif
}
VNLog(5, 1, (afs_int32) vnp);
-#ifdef AFS_PTHREAD_ENV
- vnp->writer = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&vnp->writer);
-#endif /* AFS_PTHREAD_ENV */
memset(&vnp->disk, 0, sizeof(vnp->disk));
vnp->changed_newTime = 0; /* set this bit when vnode is updated */
vnp->changed_oldTime = 0; /* set this on CopyOnWrite. */
vnp->handle = NULL;
vcp->allocs++;
vp->header->diskstuff.filecount++;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
+#endif
return vnp;
}
+/**
+ * load a vnode from disk.
+ *
+ * @param[out] ec client error code return
+ * @param[in] vp volume object pointer
+ * @param[in] vnp vnode object pointer
+ * @param[in] vcp vnode class info object pointer
+ * @param[in] class vnode class enumeration
+ *
+ * @pre vnode is registered in appropriate data structures;
+ * caller holds a ref on vnode; VOL_LOCK is held
+ *
+ * @post vnode data is loaded from disk.
+ * vnode state is set to VN_STATE_ONLINE.
+ * on failure, vnode is invalidated.
+ *
+ * @internal vnode package internal use only
+ */
+static void
+VnLoad(Error * ec, Volume * vp, Vnode * vnp,
+ struct VnodeClassInfo * vcp, VnodeClass class)
+{
+ /* vnode not cached */
+ Error error;
+ int n, dosalv = 1;
+ IHandle_t *ihP = vp->vnodeIndex[class].handle;
+ FdHandle_t *fdP;
+
+ *ec = 0;
+ vcp->reads++;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_LOAD);
+#endif
+
+ /* This will never block */
+ VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
+
+ VOL_UNLOCK;
+ fdP = IH_OPEN(ihP);
+ if (fdP == NULL) {
+ Log("VnLoad: can't open index dev=%u, i=%s\n", vp->device,
+ PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
+ *ec = VIO;
+ goto error_encountered_nolock;
+ } else if (FDH_SEEK(fdP, vnodeIndexOffset(vcp, Vn_id(vnp)), SEEK_SET)
+ < 0) {
+ Log("VnLoad: can't seek on index file vn=%u\n", Vn_id(vnp));
+ *ec = VIO;
+ goto error_encountered_nolock;
+ } else if ((n = FDH_READ(fdP, (char *)&vnp->disk, vcp->diskSize))
+ != vcp->diskSize) {
+ /* Don't take volume off line if the inumber is out of range
+ * or the inode table is full. */
+ if (n == BAD_IGET) {
+ Log("VnLoad: bad inumber %s\n",
+ PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
+ *ec = VIO;
+ dosalv = 0;
+ } else if (n == -1 && errno == EIO) {
+ /* disk error; salvage */
+ Log("VnLoad: Couldn't read vnode %u, volume %u (%s); volume needs salvage\n", Vn_id(vnp), V_id(vp), V_name(vp));
+ } else {
+ /* vnode is not allocated */
+ if (LogLevel >= 5)
+ Log("VnLoad: Couldn't read vnode %u, volume %u (%s); read %d bytes, errno %d\n",
+ Vn_id(vnp), V_id(vp), V_name(vp), n, errno);
+ *ec = VIO;
+ dosalv = 0;
+ }
+ goto error_encountered_nolock;
+ }
+ FDH_CLOSE(fdP);
+ VOL_LOCK;
+
+ /* Quick check to see that the data is reasonable */
+ if (vnp->disk.vnodeMagic != vcp->magic || vnp->disk.type == vNull) {
+ if (vnp->disk.type == vNull) {
+ *ec = VNOVNODE;
+ dosalv = 0;
+ } else {
+ struct vnodeIndex *index = &vp->vnodeIndex[class];
+ unsigned int bitNumber = vnodeIdToBitNumber(Vn_id(vnp));
+ unsigned int offset = bitNumber >> 3;
+
+ /* Test to see if vnode number is valid. */
+ if ((offset >= index->bitmapSize)
+ || ((*(index->bitmap + offset) & (1 << (bitNumber & 0x7)))
+ == 0)) {
+ Log("VnLoad: Request for unallocated vnode %u, volume %u (%s) denied.\n", Vn_id(vnp), V_id(vp), V_name(vp));
+ *ec = VNOVNODE;
+ dosalv = 0;
+ } else {
+ Log("VnLoad: Bad magic number, vnode %u, volume %u (%s); volume needs salvage\n", Vn_id(vnp), V_id(vp), V_name(vp));
+ }
+ }
+ goto error_encountered;
+ }
+
+ IH_INIT(vnp->handle, V_device(vp), V_parentId(vp), VN_GET_INO(vnp));
+ VnUnlock(vnp, WRITE_LOCK);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_ONLINE);
+#endif
+ return;
+
+
+ error_encountered_nolock:
+ if (fdP) {
+ FDH_REALLYCLOSE(fdP);
+ }
+ VOL_LOCK;
+
+ error_encountered:
+ if (dosalv) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0);
+#else
+ VForceOffline_r(vp, 0);
+ error = VSALVAGE;
+#endif
+ if (!*ec)
+ *ec = error;
+ }
+
+ VInvalidateVnode_r(vnp);
+ VnUnlock(vnp, WRITE_LOCK);
+}
+
+/**
+ * store a vnode to disk.
+ *
+ * @param[out] ec error code output
+ * @param[in] vp volume object pointer
+ * @param[in] vnp vnode object pointer
+ * @param[in] vcp vnode class info object pointer
+ * @param[in] class vnode class enumeration
+ *
+ * @pre VOL_LOCK held.
+ * caller holds refs to volume and vnode.
+ * DAFS: caller is responsible for performing state sanity checks.
+ *
+ * @post vnode state is stored to disk.
+ *
+ * @internal vnode package internal use only
+ */
+static void
+VnStore(Error * ec, Volume * vp, Vnode * vnp,
+ struct VnodeClassInfo * vcp, VnodeClass class)
+{
+ int offset, code;
+ IHandle_t *ihP = vp->vnodeIndex[class].handle;
+ FdHandle_t *fdP;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnState vn_state_save;
+#endif
+
+ *ec = 0;
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ vn_state_save = VnChangeState_r(vnp, VN_STATE_STORE);
+#endif
+
+ offset = vnodeIndexOffset(vcp, Vn_id(vnp));
+ VOL_UNLOCK;
+ fdP = IH_OPEN(ihP);
+ if (fdP == NULL) {
+ Log("VnStore: can't open index file!\n");
+ goto error_encountered;
+ }
+ if (FDH_SEEK(fdP, offset, SEEK_SET) < 0) {
+ Log("VnStore: can't seek on index file! fdp=0x%x offset=%d, errno=%d\n",
+ fdP, offset, errno);
+ goto error_encountered;
+ }
+
+ code = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
+ if (code != vcp->diskSize) {
+ /* Don't force volume offline if the inumber is out of
+ * range or the inode table is full.
+ */
+ FDH_REALLYCLOSE(fdP);
+ if (code == BAD_IGET) {
+ Log("VnStore: bad inumber %s\n",
+ PrintInode(NULL,
+ vp->vnodeIndex[class].handle->ih_ino));
+ *ec = VIO;
+ VOL_LOCK;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_ERROR);
+#endif
+ } else {
+ Log("VnStore: Couldn't write vnode %u, volume %u (%s) (error %d)\n", Vn_id(vnp), V_id(Vn_volume(vnp)), V_name(Vn_volume(vnp)), code);
+#ifdef AFS_DEMAND_ATTACH_FS
+ goto error_encountered;
+#else
+ VOL_LOCK;
+ VForceOffline_r(vp, 0);
+ *ec = VSALVAGE;
+#endif
+ }
+ return;
+ } else {
+ FDH_CLOSE(fdP);
+ }
+
+ VOL_LOCK;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, vn_state_save);
+#endif
+ return;
+
+ error_encountered:
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* XXX instead of dumping core, let's try to request a salvage
+ * and just fail the putvnode */
+ if (fdP)
+ FDH_CLOSE(fdP);
+ VOL_LOCK;
+ VnChangeState_r(vnp, VN_STATE_ERROR);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
+#else
+ assert(1 == 2);
+#endif
+}
+
+/**
+ * get a handle to a vnode object.
+ *
+ * @param[out] ec error code
+ * @param[in] vp volume object
+ * @param[in] vnodeNumber vnode id
+ * @param[in] locktype type of lock to acquire
+ *
+ * @return vnode object pointer
+ *
+ * @see VGetVnode_r
+ */
Vnode *
VGetVnode(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
{ /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
return retVal;
}
+/**
+ * get a handle to a vnode object.
+ *
+ * @param[out] ec error code
+ * @param[in] vp volume object
+ * @param[in] vnodeNumber vnode id
+ * @param[in] locktype type of lock to acquire
+ *
+ * @return vnode object pointer
+ *
+ * @internal vnode package internal use only
+ *
+ * @pre VOL_LOCK held.
+ * heavyweight ref held on volume object.
+ */
Vnode *
VGetVnode_r(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
{ /* READ_LOCK or WRITE_LOCK, as defined in lock.h */
register Vnode *vnp;
- int newHash;
+ int code;
VnodeClass class;
struct VnodeClassInfo *vcp;
+ Volume * oldvp = NULL;
*ec = 0;
- mlkReason = 0; /* last call didn't fail */
if (vnodeNumber == 0) {
*ec = VNOVNODE;
- mlkReason = 1;
return NULL;
}
VNLog(100, 1, vnodeNumber);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * once a volume has entered an error state, don't permit
+ * further operations to proceed
+ * -- tkeiser 11/21/2007
+ */
+ VWaitExclusiveState_r(vp);
+ if (VIsErrorState(V_attachState(vp))) {
+ /* XXX is VSALVAGING acceptable here? */
+ *ec = VSALVAGING;
+ return NULL;
+ }
+#endif
+
if (programType == fileServer && !V_inUse(vp)) {
*ec = (vp->specialStatus ? vp->specialStatus : VOFFLINE);
* a READ operation, then don't fail.
*/
if ((*ec != VBUSY) || (locktype != READ_LOCK)) {
- mlkReason = 2;
return NULL;
}
*ec = 0;
vcp = &VnodeClassInfo[class];
if (locktype == WRITE_LOCK && !VolumeWriteable(vp)) {
*ec = (bit32) VREADONLY;
- mlkReason = 3;
return NULL;
}
if (locktype == WRITE_LOCK && programType == fileServer) {
VAddToVolumeUpdateList_r(ec, vp);
if (*ec) {
- mlkReason = 1000 + *ec;
return NULL;
}
}
- /* See whether the vnode is in the cache. */
- newHash = VNODE_HASH(vp, vnodeNumber);
- for (vnp = VnodeHashTable[newHash];
- vnp && (vnp->vnodeNumber != vnodeNumber || vnp->volumePtr != vp
- || vnp->volumePtr->cacheCheck != vnp->cacheCheck);
- vnp = vnp->hashNext);
vcp->gets++;
- if (vnp == NULL) {
- int n;
- IHandle_t *ihP = vp->vnodeIndex[class].handle;
- FdHandle_t *fdP;
+
+ /* See whether the vnode is in the cache. */
+ vnp = VLookupVnode(vp, vnodeNumber);
+ if (vnp) {
+ /* vnode is in cache */
+
+ VNLog(101, 2, vnodeNumber, (afs_int32) vnp);
+ VnCreateReservation_r(vnp);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * this is the one DAFS case where we may run into contention.
+ * here's the basic control flow:
+ *
+ * if locktype is READ_LOCK:
+ * wait until vnode is not exclusive
+ * set to VN_STATE_READ
+ * increment read count
+ * done
+ * else
+ * wait until vnode is quiescent
+ * set to VN_STATE_EXCLUSIVE
+ * done
+ */
+ if (locktype == READ_LOCK) {
+ VnWaitExclusiveState_r(vnp);
+ } else {
+ VnWaitQuiescent_r(vnp);
+ }
+
+ if (VnIsErrorState(Vn_state(vnp))) {
+ VnCancelReservation_r(vnp);
+ *ec = VSALVAGE;
+ return NULL;
+ }
+#endif /* AFS_DEMAND_ATTACH_FS */
+ } else {
+ /* vnode not cached */
+
/* Not in cache; tentatively grab most distantly used one from the LRU
* chain */
vcp->reads++;
vnp = VGetFreeVnode_r(vcp);
- /* Remove it from the old hash chain */
- if (vnp->volumePtr)
- DeleteFromVnHashByVolumeTable(vnp);
- moveHash(vnp, newHash);
- /* Remove it from the LRU chain */
- if (vnp == vcp->lruHead)
- vcp->lruHead = vcp->lruHead->lruNext;
- if (vnp == vcp->lruHead || vcp->lruHead == NULL)
- Abort("VGetVnode: lru chain addled!\n");
- vnp->lruPrev->lruNext = vnp->lruNext;
- vnp->lruNext->lruPrev = vnp->lruPrev;
+
/* Initialize */
vnp->changed_newTime = vnp->changed_oldTime = 0;
vnp->delete = 0;
- vnp->vnodeNumber = vnodeNumber;
- vnp->volumePtr = vp;
- vnp->cacheCheck = vp->cacheCheck;
- vnp->nUsers = 1;
- AddToVnHashByVolumeTable(vnp);
-
- /* This will never block */
- ObtainWriteLock(&vnp->lock);
-#ifdef AFS_PTHREAD_ENV
- vnp->writer = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&vnp->writer);
-#endif /* AFS_PTHREAD_ENV */
-
- /* Read vnode from volume index */
- VOL_UNLOCK;
- fdP = IH_OPEN(ihP);
- if (fdP == NULL) {
- Log("VGetVnode: can't open index dev=%u, i=%s\n", vp->device,
- PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
-#ifdef AFS_DEMAND_ATTACH_FS
- VOL_LOCK;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- VOL_UNLOCK;
-#endif
- *ec = VIO;
- mlkReason = 9;
- } else if (FDH_SEEK(fdP, vnodeIndexOffset(vcp, vnodeNumber), SEEK_SET)
- < 0) {
- Log("VGetVnode: can't seek on index file vn=%u\n", vnodeNumber);
+ Vn_id(vnp) = vnodeNumber;
+ VnCreateReservation_r(vnp);
+ AddToVVnList(vp, vnp);
#ifdef AFS_DEMAND_ATTACH_FS
- VOL_LOCK;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- VOL_UNLOCK;
+ AddToVnHash(vnp);
#endif
- *ec = VIO;
- mlkReason = 10;
- FDH_REALLYCLOSE(fdP);
- } else if ((n = FDH_READ(fdP, (char *)&vnp->disk, vcp->diskSize))
- != vcp->diskSize) {
- /* Don't take volume off line if the inumber is out of range
- * or the inode table is full. */
- FDH_REALLYCLOSE(fdP);
- VOL_LOCK;
- if (n == BAD_IGET) {
- Log("VGetVnode: bad inumber %s\n",
- PrintInode(NULL, vp->vnodeIndex[class].handle->ih_ino));
- *ec = VIO;
- mlkReason = 4;
- }
- /* Check for disk errors. Anything else just means that the vnode
- * is not allocated */
- if (n == -1 && errno == EIO) {
- Log("VGetVnode: Couldn't read vnode %u, volume %u (%s); volume needs salvage\n", vnodeNumber, V_id(vp), V_name(vp));
-#ifdef AFS_DEMAND_ATTACH_FS
- if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
- } else {
- VForceOffline_r(vp, 0);
- *ec = VSALVAGE;
- }
-#else
- VForceOffline_r(vp, 0);
- *ec = VSALVAGE;
-#endif
- mlkReason = 4;
- } else {
- /* Probably legit; Don't knock the volume offline */
- if (LogLevel >= 5)
- Log("VGetVnode: Couldn't read vnode %u, volume %u (%s); errno %d\n", vnodeNumber, V_id(vp), V_name(vp), errno);
- mlkReason = 5;
- *ec = VIO;
- }
- VInvalidateVnode_r(vnp);
- if (vnp->nUsers-- == 1)
- StickOnLruChain_r(vnp, vcp);
- ReleaseWriteLock(&vnp->lock);
+
+ /*
+ * XXX for non-DAFS, there is a serious
+ * race condition here:
+ *
+ * two threads can race to load a vnode. the net
+ * result is two struct Vnodes can be allocated
+ * and hashed, which point to the same underlying
+ * disk data store. conflicting vnode locks can
+ * thus be held concurrently.
+ *
+ * for non-DAFS to be safe, VOL_LOCK really shouldn't
+ * be dropped in VnLoad. Of course, this would likely
+ * lead to an unacceptable slow-down.
+ */
+
+ VnLoad(ec, vp, vnp, vcp, class);
+ if (*ec) {
+ VnCancelReservation_r(vnp);
return NULL;
}
- FDH_CLOSE(fdP);
- VOL_LOCK;
- /* Quick check to see that the data is reasonable */
- if (vnp->disk.vnodeMagic != vcp->magic || vnp->disk.type == vNull) {
- if (vnp->disk.type == vNull) {
- *ec = VNOVNODE;
- mlkReason = 6;
- VInvalidateVnode_r(vnp);
- if (vnp->nUsers-- == 1)
- StickOnLruChain_r(vnp, vcp);
- ReleaseWriteLock(&vnp->lock);
- return NULL; /* The vnode is not allocated */
- } else {
- struct vnodeIndex *index = &vp->vnodeIndex[class];
- unsigned int bitNumber = vnodeIdToBitNumber(vnodeNumber);
- unsigned int offset = bitNumber >> 3;
-
- /* Test to see if vnode number is valid. */
- if ((offset >= index->bitmapSize)
- || ((*(index->bitmap + offset) & (1 << (bitNumber & 0x7)))
- == 0)) {
- Log("VGetVnode: Request for unallocated vnode %u, volume %u (%s) denied.\n", vnodeNumber, V_id(vp), V_name(vp));
- mlkReason = 11;
- *ec = VNOVNODE;
- } else {
- Log("VGetVnode: Bad magic number, vnode %u, volume %u (%s); volume needs salvage\n", vnodeNumber, V_id(vp), V_name(vp));
-#ifdef AFS_DEMAND_ATTACH_FS
- if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
- } else {
- vp->goingOffline = 1;
- *ec = VSALVAGE;
- }
-#else
- vp->goingOffline = 1; /* used to call VOffline, but that would mess
- * up the volume ref count if called here */
- *ec = VSALVAGE;
+#ifndef AFS_DEMAND_ATTACH_FS
+ AddToVnHash(vnp);
#endif
- mlkReason = 7;
- }
- VInvalidateVnode_r(vnp);
- if (vnp->nUsers-- == 1)
- StickOnLruChain_r(vnp, vcp);
- ReleaseWriteLock(&vnp->lock);
- return NULL;
- }
- }
- IH_INIT(vnp->handle, V_device(vp), V_parentId(vp), VN_GET_INO(vnp));
- ReleaseWriteLock(&vnp->lock);
- } else {
- VNLog(101, 2, vnodeNumber, (afs_int32) vnp);
- if (++vnp->nUsers == 1) {
- /* First user. Remove it from the LRU chain. We can assume that
- * there is at least one item in the queue */
- if (vnp == vcp->lruHead)
- vcp->lruHead = vcp->lruHead->lruNext;
- if (vnp == vcp->lruHead || vcp->lruHead == NULL)
- Abort("VGetVnode: lru chain addled!\n");
- vnp->lruPrev->lruNext = vnp->lruNext;
- vnp->lruNext->lruPrev = vnp->lruPrev;
- }
- }
- VOL_UNLOCK;
- if (locktype == READ_LOCK)
- ObtainReadLock(&vnp->lock);
- else {
- ObtainWriteLock(&vnp->lock);
-#ifdef AFS_PTHREAD_ENV
- vnp->writer = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&vnp->writer);
-#endif /* AFS_PTHREAD_ENV */
+ /*
+ * DAFS:
+ * there is no possibility for contention. we "own" this vnode.
+ */
}
- VOL_LOCK;
+
+ /*
+ * DAFS:
+ * it is imperative that nothing drop vol lock between here
+ * and the VnBeginRead/VnChangeState stanza below
+ */
+
+ VnLock(vnp, locktype, VOL_LOCK_HELD, MIGHT_DEADLOCK);
+
/* Check that the vnode hasn't been removed while we were obtaining
* the lock */
VNLog(102, 2, vnodeNumber, (afs_int32) vnp);
- if ((vnp->disk.type == vNull) || (vnp->cacheCheck == 0)) {
- if (vnp->nUsers-- == 1)
- StickOnLruChain_r(vnp, vcp);
- if (locktype == READ_LOCK)
- ReleaseReadLock(&vnp->lock);
- else
- ReleaseWriteLock(&vnp->lock);
+ if ((vnp->disk.type == vNull) || (Vn_cacheCheck(vnp) == 0)) {
+ VnUnlock(vnp, locktype);
+ VnCancelReservation_r(vnp);
*ec = VNOVNODE;
- mlkReason = 8;
/* vnode is labelled correctly by now, so we don't have to invalidate it */
return NULL;
}
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (locktype == READ_LOCK) {
+ VnBeginRead_r(vnp);
+ } else {
+ VnChangeState_r(vnp, VN_STATE_EXCLUSIVE);
+ }
+#endif
+
if (programType == fileServer)
- VBumpVolumeUsage_r(vnp->volumePtr); /* Hack; don't know where it should be
+ VBumpVolumeUsage_r(Vn_volume(vnp)); /* Hack; don't know where it should be
* called from. Maybe VGetVolume */
return vnp;
}
VOL_UNLOCK;
}
+/**
+ * put back a handle to a vnode object.
+ *
+ * @param[out] ec client error code
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held.
+ * ref held on vnode.
+ *
+ * @post ref dropped on vnode.
+ * if vnode was modified or deleted, it is written out to disk
+ * (assuming a write lock was held).
+ *
+ * @internal volume package internal use only
+ */
void
VPutVnode_r(Error * ec, register Vnode * vnp)
{
- int writeLocked, offset;
+ int writeLocked;
VnodeClass class;
struct VnodeClassInfo *vcp;
int code;
*ec = 0;
- assert(vnp->nUsers != 0);
- class = vnodeIdToClass(vnp->vnodeNumber);
+ assert(Vn_refcount(vnp) != 0);
+ class = vnodeIdToClass(Vn_id(vnp));
vcp = &VnodeClassInfo[class];
assert(vnp->disk.vnodeMagic == vcp->magic);
- VNLog(200, 2, vnp->vnodeNumber, (afs_int32) vnp);
+ VNLog(200, 2, Vn_id(vnp), (afs_int32) vnp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
+#else
writeLocked = WriteLocked(&vnp->lock);
+#endif
+
if (writeLocked) {
+ /* sanity checks */
#ifdef AFS_PTHREAD_ENV
pthread_t thisProcess = pthread_self();
#else /* AFS_PTHREAD_ENV */
if (thisProcess != vnp->writer)
Abort("VPutVnode: Vnode at 0x%x locked by another process!\n",
vnp);
+
+
if (vnp->changed_oldTime || vnp->changed_newTime || vnp->delete) {
- Volume *vp = vnp->volumePtr;
+ Volume *vp = Vn_volume(vnp);
afs_uint32 now = FT_ApproxTime();
- assert(vnp->cacheCheck == vp->cacheCheck);
+ assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
if (vnp->delete) {
/* No longer any directory entries for this vnode. Free the Vnode */
memset(&vnp->disk, 0, sizeof(vnp->disk));
- mlkLastDelete = vnp->vnodeNumber;
/* delete flag turned off further down */
- VNLog(202, 2, vnp->vnodeNumber, (afs_int32) vnp);
+ VNLog(202, 2, Vn_id(vnp), (afs_int32) vnp);
} else if (vnp->changed_newTime) {
vnp->disk.serverModifyTime = now;
}
/* The vnode has been changed. Write it out to disk */
if (!V_inUse(vp)) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
#else
assert(V_needsSalvaged(vp));
*ec = VSALVAGE;
#endif
} else {
- IHandle_t *ihP = vp->vnodeIndex[class].handle;
- FdHandle_t *fdP;
- VOL_UNLOCK;
- fdP = IH_OPEN(ihP);
- if (fdP == NULL) {
- Log("VPutVnode: can't open index file!\n");
- goto error_encountered;
- }
- offset = vnodeIndexOffset(vcp, vnp->vnodeNumber);
- if (FDH_SEEK(fdP, offset, SEEK_SET) < 0) {
- Log("VPutVnode: can't seek on index file! fdp=0x%x offset=%d, errno=%d\n",
- fdP, offset, errno);
- goto error_encountered;
- }
- code = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
- if (code != vcp->diskSize) {
- /* Don't force volume offline if the inumber is out of
- * range or the inode table is full.
- */
- VOL_LOCK;
- if (code == BAD_IGET) {
- Log("VPutVnode: bad inumber %s\n",
- PrintInode(NULL,
- vp->vnodeIndex[class].handle->ih_ino));
- *ec = VIO;
- } else {
- Log("VPutVnode: Couldn't write vnode %u, volume %u (%s) (error %d)\n", vnp->vnodeNumber, V_id(vnp->volumePtr), V_name(vnp->volumePtr), code);
-#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
-#else
- VForceOffline_r(vp, 0);
- *ec = VSALVAGE;
-#endif
- }
- VOL_UNLOCK;
- FDH_REALLYCLOSE(fdP);
- } else {
- FDH_CLOSE(fdP);
- }
- VOL_LOCK;
- goto sane;
-
- error_encountered:
-#ifdef AFS_DEMAND_ATTACH_FS
- /* XXX instead of dumping core, let's try to request a salvage
- * and just fail the putvnode */
- if (fdP)
- FDH_CLOSE(fdP);
- VOL_LOCK;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
- goto done;
-#else
- assert(1 == 2);
-#endif
+ VnStore(ec, vp, vnp, vcp, class);
- sane:
/* If the vnode is to be deleted, and we wrote the vnode out,
* free its bitmap entry. Do after the vnode is written so we
* don't allocate from bitmap before the vnode is written
* (doing so could cause a "addled bitmap" message).
*/
if (vnp->delete && !*ec) {
- if (vnp->volumePtr->header->diskstuff.filecount-- < 1)
- vnp->volumePtr->header->diskstuff.filecount = 0;
+ if (Vn_volume(vnp)->header->diskstuff.filecount-- < 1)
+ Vn_volume(vnp)->header->diskstuff.filecount = 0;
VFreeBitMapEntry_r(ec, &vp->vnodeIndex[class],
- vnodeIdToBitNumber(vnp->vnodeNumber));
+ vnodeIdToBitNumber(Vn_id(vnp)));
}
}
vcp->writes++;
vnp->changed_newTime = vnp->changed_oldTime = 0;
}
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_ONLINE);
+#endif
} else { /* Not write locked */
if (vnp->changed_newTime || vnp->changed_oldTime || vnp->delete)
Abort
("VPutVnode: Change or delete flag for vnode 0x%x is set but vnode is not write locked!\n",
vnp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnEndRead_r(vnp);
+#endif
}
- done:
/* Do not look at disk portion of vnode after this point; it may
* have been deleted above */
- if (vnp->nUsers-- == 1)
- StickOnLruChain_r(vnp, vcp);
vnp->delete = 0;
-
- if (writeLocked)
- ReleaseWriteLock(&vnp->lock);
- else
- ReleaseReadLock(&vnp->lock);
+ VnUnlock(vnp, ((writeLocked) ? WRITE_LOCK : READ_LOCK));
+ VnCancelReservation_r(vnp);
}
/*
return retVal;
}
+/**
+ * convert vnode handle from mutually exclusive to shared access.
+ *
+ * @param[out] ec client error code
+ * @param[in] vnp vnode object pointer
+ *
+ * @return unspecified use (see out argument 'ec' for error code return)
+ *
+ * @pre VOL_LOCK held.
+ * ref held on vnode.
+ * write lock held on vnode.
+ *
+ * @post read lock held on vnode.
+ * if vnode was modified, it has been written to disk.
+ *
+ * @internal volume package internal use only
+ */
int
VVnodeWriteToRead_r(Error * ec, register Vnode * vnp)
{
#endif /* AFS_PTHREAD_ENV */
*ec = 0;
- assert(vnp->nUsers != 0);
- class = vnodeIdToClass(vnp->vnodeNumber);
+ assert(Vn_refcount(vnp) != 0);
+ class = vnodeIdToClass(Vn_id(vnp));
vcp = &VnodeClassInfo[class];
assert(vnp->disk.vnodeMagic == vcp->magic);
- writeLocked = WriteLocked(&vnp->lock);
- VNLog(300, 2, vnp->vnodeNumber, (afs_int32) vnp);
+ VNLog(300, 2, Vn_id(vnp), (afs_int32) vnp);
+#ifdef AFS_DEMAND_ATTACH_FS
+ writeLocked = (Vn_state(vnp) == VN_STATE_EXCLUSIVE);
+#else
+ writeLocked = WriteLocked(&vnp->lock);
+#endif
if (!writeLocked) {
return 0;
}
-#ifdef AFS_PTHREAD_ENV
- thisProcess = pthread_self();
-#else /* AFS_PTHREAD_ENV */
- LWP_CurrentProcess(&thisProcess);
-#endif /* AFS_PTHREAD_ENV */
+
VNLog(301, 2, (afs_int32) vnp,
((vnp->changed_newTime) << 1) | ((vnp->
changed_oldTime) << 1) | vnp->
delete);
+
+ /* sanity checks */
+#ifdef AFS_PTHREAD_ENV
+ thisProcess = pthread_self();
+#else /* AFS_PTHREAD_ENV */
+ LWP_CurrentProcess(&thisProcess);
+#endif /* AFS_PTHREAD_ENV */
if (thisProcess != vnp->writer)
Abort("VPutVnode: Vnode at 0x%x locked by another process!\n",
(int)vnp);
+
if (vnp->delete) {
return 0;
}
if (vnp->changed_oldTime || vnp->changed_newTime) {
- Volume *vp = vnp->volumePtr;
+ Volume *vp = Vn_volume(vnp);
afs_uint32 now = FT_ApproxTime();
- assert(vnp->cacheCheck == vp->cacheCheck);
+ assert(Vn_cacheCheck(vnp) == vp->cacheCheck);
if (vnp->changed_newTime)
vnp->disk.serverModifyTime = now;
if (vnp->changed_newTime)
/* The inode has been changed. Write it out to disk */
if (!V_inUse(vp)) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, 0);
#else
assert(V_needsSalvaged(vp));
*ec = VSALVAGE;
#endif
} else {
- IHandle_t *ihP = vp->vnodeIndex[class].handle;
- FdHandle_t *fdP;
- off_t off = vnodeIndexOffset(vcp, vnp->vnodeNumber);
- VOL_UNLOCK;
- fdP = IH_OPEN(ihP);
- if (fdP == NULL) {
- Log("VPutVnode: can't open index file!\n");
- goto error_encountered;
- }
- code = FDH_SEEK(fdP, off, SEEK_SET);
- if (code < 0) {
- Log("VPutVnode: can't seek on index file!\n");
- goto error_encountered;
- }
- code = FDH_WRITE(fdP, &vnp->disk, vcp->diskSize);
- if (code != vcp->diskSize) {
- /*
- * Don't force volume offline if the inumber is out of
- * range or the inode table is full.
- */
- VOL_LOCK;
- if (code == BAD_IGET) {
- Log("VPutVnode: bad inumber %s\n",
- PrintInode(NULL,
- vp->vnodeIndex[class].handle->ih_ino));
- *ec = VIO;
- } else {
- Log("VPutVnode: Couldn't write vnode %u, volume %u (%s)\n", vnp->vnodeNumber, V_id(vnp->volumePtr), V_name(vnp->volumePtr));
-#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
-#else
- VForceOffline_r(vp, 0);
- *ec = VSALVAGE;
-#endif
- }
- VOL_UNLOCK;
- }
- FDH_CLOSE(fdP);
- VOL_LOCK;
- goto sane;
-
- error_encountered:
-#ifdef AFS_DEMAND_ATTACH_FS
- if (fdP)
- FDH_CLOSE(fdP);
- VOL_LOCK;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
- *ec = VSALVAGING;
-#else
- assert(1 == 2);
-#endif
-
+ VnStore(ec, vp, vnp, vcp, class);
}
sane:
vcp->writes++;
vnp->changed_newTime = vnp->changed_oldTime = 0;
}
+ vnp->writer = 0;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VnChangeState_r(vnp, VN_STATE_ONLINE);
+ VnBeginRead_r(vnp);
+#else
ConvertWriteToReadLock(&vnp->lock);
+#endif
return 0;
}
-/* Move the vnode, vnp, to the new hash table given by the
- hash table index, newHash */
-static int
-moveHash(register Vnode * vnp, bit32 newHash)
-{
- Vnode *tvnp;
- /* Remove it from the old hash chain */
- tvnp = VnodeHashTable[vnp->hashIndex];
- if (tvnp == vnp)
- VnodeHashTable[vnp->hashIndex] = vnp->hashNext;
- else {
- while (tvnp && tvnp->hashNext != vnp)
- tvnp = tvnp->hashNext;
- if (tvnp)
- tvnp->hashNext = vnp->hashNext;
- }
- /* Add it to the new hash chain */
- vnp->hashNext = VnodeHashTable[newHash];
- VnodeHashTable[newHash] = vnp;
- vnp->hashIndex = newHash;
- return 0;
-}
-
-private void
-StickOnLruChain_r(register Vnode * vnp, register struct VnodeClassInfo *vcp)
-{
- /* Add it to the circular LRU list */
- if (vcp->lruHead == NULL)
- Abort("VPutVnode: vcp->lruHead==NULL");
- else {
- vnp->lruNext = vcp->lruHead;
- vnp->lruPrev = vcp->lruHead->lruPrev;
- vcp->lruHead->lruPrev = vnp;
- vnp->lruPrev->lruNext = vnp;
- vcp->lruHead = vnp;
- }
- /* If the vnode was just deleted, put it at the end of the chain so it
- * will be reused immediately */
- if (vnp->delete)
- vcp->lruHead = vnp->lruNext;
- /* If caching is turned off, set volumeptr to NULL to invalidate the
- * entry */
- if (!TrustVnodeCacheEntry) {
- DeleteFromVnHashByVolumeTable(vnp);
- vnp->volumePtr = NULL;
- }
-}
-
/* VCloseVnodeFiles - called when a volume is going off line. All open
* files for vnodes in that volume are closed. This might be excessive,
* since we may only be taking one volume of a volume group offline.
{
int i;
Vnode *vnp, *nvnp;
- VnodeHashByVolumeChainHead * head;
-
- head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vp->hashid)];
#ifdef AFS_DEMAND_ATTACH_FS
- while (head->busy) {
- assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
- }
+ VolState vol_state_save;
- head->busy = 1;
+ vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_CLOSE);
VOL_UNLOCK;
#endif /* AFS_DEMAND_ATTACH_FS */
- for (queue_Scan(head, vnp, nvnp, Vnode)) {
- if (vnp->volumePtr == vp) {
- IH_REALLYCLOSE(vnp->handle);
- }
+ for (queue_Scan(&vp->vnode_list, vnp, nvnp, Vnode)) {
+ IH_REALLYCLOSE(vnp->handle);
+ DeleteFromVVnList(vnp);
}
#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
- head->busy = 0;
- assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+ VChangeState_r(vp, vol_state_save);
#endif /* AFS_DEMAND_ATTACH_FS */
}
-/* VReleaseVnodeFiles - called when a volume is going detached. All open
- * files for vnodes in that volume are closed and all inode handles
- * for vnodes in that volume are released.
+/**
+ * shut down all vnode cache state for a given volume.
+ *
+ * @param[in] vp volume object pointer
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @post all file descriptors closed.
+ * all inode handles released.
+ * all vnode cache objects disassociated from volume.
+ *
+ * @note for DAFS, these operations are performed outside the vol glock under
+ * volume exclusive state VOL_STATE_VNODE_RELEASE. Please further note
+ * that it would be a bug to acquire and release a volume reservation
+ * during this exclusive operation. This is due to the fact that we are
+ * generally called during the refcount 1->0 transition.
+ *
+ * @internal this routine is internal to the volume package
*/
void
VReleaseVnodeFiles_r(Volume * vp)
{
int i;
Vnode *vnp, *nvnp;
- VnodeHashByVolumeChainHead * head;
-
- head = &VnodeHashByVolumeTable[VNVOLUME_HASH(vp->hashid)];
-
#ifdef AFS_DEMAND_ATTACH_FS
- while (head->busy) {
- assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
- }
+ VolState vol_state_save;
- head->busy = 1;
+ vol_state_save = VChangeState_r(vp, VOL_STATE_VNODE_RELEASE);
VOL_UNLOCK;
#endif /* AFS_DEMAND_ATTACH_FS */
- for (queue_Scan(head, vnp, nvnp, Vnode)) {
- if (vnp->volumePtr == vp) {
- IH_RELEASE(vnp->handle);
- }
+ for (queue_Scan(&vp->vnode_list, vnp, nvnp, Vnode)) {
+ IH_RELEASE(vnp->handle);
+ DeleteFromVVnList(vnp);
}
#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
- head->busy = 0;
- assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
+ VChangeState_r(vp, vol_state_save);
#endif /* AFS_DEMAND_ATTACH_FS */
}
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2007-2008 Sine Nomine Associates
*/
/*
*/
+#ifndef _AFS_VOL_VNODE_H
+#define _AFS_VOL_VNODE_H 1
+
#define Date afs_uint32
struct Volume; /* Potentially forward definition. */
(sizeof(VnodeDiskObject) == SIZEOF_SMALLDISKVNODE)
#define SIZEOF_LARGEDISKVNODE 256
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * demand attach vnode state enumeration.
+ *
+ * @note values must be contiguous for VnIsValidState() to work
+ */
+typedef enum {
+ VN_STATE_INVALID = 0, /**< vnode does not contain valid cache data */
+ VN_STATE_RELEASING = 1, /**< vnode is busy releasing its ihandle ref */
+ VN_STATE_CLOSING = 2, /**< vnode is busy closing its ihandle ref */
+ VN_STATE_ALLOC = 3, /**< vnode is busy allocating disk entry */
+ VN_STATE_ONLINE = 4, /**< vnode is ready for use */
+ VN_STATE_LOAD = 5, /**< vnode is busy being loaded from disk */
+ VN_STATE_EXCLUSIVE = 6, /**< something external to the vnode package
+ * is operating exclusively on this vnode */
+ VN_STATE_STORE = 7, /**< vnode is busy being stored to disk */
+ VN_STATE_READ = 8, /**< a non-zero number of threads are executing
+ * code external to the vnode package which
+ * requires shared access */
+ VN_STATE_ERROR = 10, /**< vnode hard error state */
+ VN_STATE_COUNT
+} VnState;
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+/**
+ * DAFS vnode state flags.
+ */
+enum VnFlags {
+ VN_ON_HASH = 0x1, /**< vnode is on hash table */
+ VN_ON_LRU = 0x2, /**< vnode is on lru list */
+ VN_ON_VVN = 0x4, /**< vnode is on volume vnode list */
+};
+
+
typedef struct Vnode {
struct rx_queue vid_hash; /* for vnode by volume id hash */
struct Vnode *hashNext; /* Next vnode on hash conflict chain */
bit32 nUsers; /* Number of lwp's who have done a VGetVnode */
bit32 cacheCheck; /* Must equal the value in the volume Header
* for the cache entry to be valid */
+ bit32 vn_state_flags; /**< vnode state flags */
+#ifdef AFS_DEMAND_ATTACH_FS
+ bit32 nReaders; /**< number of read locks held */
+ VnState vn_state; /**< vnode state */
+ pthread_cond_t vn_state_cv; /**< state change notification cv */
+#else /* !AFS_DEMAND_ATTACH_FS */
struct Lock lock; /* Internal lock */
+#endif /* !AFS_DEMAND_ATTACH_FS */
#ifdef AFS_PTHREAD_ENV
pthread_t writer; /* thread holding write lock */
#else /* AFS_PTHREAD_ENV */
PROCESS writer; /* Process id having write lock */
#endif /* AFS_PTHREAD_ENV */
+ struct VnodeClassInfo * vcp; /**< our vnode class */
IHandle_t *handle;
VnodeDiskObject disk; /* The actual disk data for the vnode */
} Vnode;
(sizeof(struct Vnode) - sizeof(VnodeDiskObject) + SIZEOF_LARGEDISKVNODE)
#define SIZEOF_SMALLVNODE (sizeof (struct Vnode))
+
+/*
+ * struct Vnode accessor abstraction
+ */
+#define Vn_refcount(vnp) ((vnp)->nUsers)
+#define Vn_state(vnp) ((vnp)->vn_state)
+#define Vn_stateFlags(vnp) ((vnp)->vn_state_flags)
+#define Vn_stateCV(vnp) ((vnp)->vn_state_cv)
+#define Vn_volume(vnp) ((vnp)->volumePtr)
+#define Vn_cacheCheck(vnp) ((vnp)->cacheCheck)
+#define Vn_class(vnp) ((vnp)->vcp)
+#define Vn_readers(vnp) ((vnp)->nReaders)
+#define Vn_id(vnp) ((vnp)->vnodeNumber)
+
+
#ifdef AFS_LARGEFILE_ENV
#define VN_GET_LEN(N, V) FillInt64(N, (V)->disk.reserved6, (V)->disk.length)
#define VNDISK_GET_LEN(N, V) FillInt64(N, (V)->reserved6, (V)->length)
extern Vnode *VAllocVnode_r(Error * ec, struct Volume *vp, VnodeType type);
/*extern VFreeVnode();*/
extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp);
-extern void VInitVnHashByVolume(void);
+extern Vnode *VLookupVnode(struct Volume * vp, VnodeId vnodeId);
+
+extern void AddToVVnList(struct Volume * vp, Vnode * vnp);
+extern void DeleteFromVVnList(register Vnode * vnp);
+extern void AddToVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp);
+extern void DeleteFromVnLRU(struct VnodeClassInfo * vcp, Vnode * vnp);
+extern void AddToVnHash(Vnode * vnp);
+extern void DeleteFromVnHash(Vnode * vnp);
+
+#endif /* _AFS_VOL_VNODE_H */
--- /dev/null
+/*
+ * Copyright 2007-2008, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+#ifndef _AFS_VOL_VNODE_INLINE_H
+#define _AFS_VOL_VNODE_INLINE_H 1
+
+#include "vnode.h"
+
+#ifdef AFS_AIX_ENV
+#define static_inline inline
+#else
+#define static_inline static inline
+#endif
+
+/***************************************************/
+/* demand attach vnode state machine routines */
+/***************************************************/
+
+/**
+ * get a reference to a vnode object.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @internal vnode package internal use only
+ *
+ * @pre VOL_LOCK must be held
+ *
+ * @post vnode refcount incremented
+ *
+ * @see VnCancelReservation_r
+ */
+static_inline void
+VnCreateReservation_r(Vnode * vnp)
+{
+ Vn_refcount(vnp)++;
+ if (Vn_refcount(vnp) == 1) {
+ DeleteFromVnLRU(Vn_class(vnp), vnp);
+ }
+}
+
+extern int TrustVnodeCacheEntry;
+
+/**
+ * release a reference to a vnode object.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post refcount decremented; possibly re-added to vn lru
+ *
+ * @internal vnode package internal use only
+ *
+ * @see VnCreateReservation_r
+ */
+static_inline void
+VnCancelReservation_r(Vnode * vnp)
+{
+ if (--Vn_refcount(vnp) == 0) {
+ AddToVnLRU(Vn_class(vnp), vnp);
+
+ /* If caching is turned off,
+ * disassociate vnode cache entry from volume object */
+ if (!TrustVnodeCacheEntry) {
+ DeleteFromVVnList(vnp);
+ }
+ }
+}
+
+#ifdef AFS_PTHREAD_ENV
+#define VN_SET_WRITER_THREAD_ID(v) (((v)->writer) = pthread_self())
+#else
+#define VN_SET_WRITER_THREAD_ID(v) (LWP_CurrentProcess(&((v)->writer)))
+#endif
+
+#define VOL_LOCK_NOT_HELD 0
+#define VOL_LOCK_HELD 1
+#define MIGHT_DEADLOCK 0
+#define WILL_NOT_DEADLOCK 1
+
+/**
+ * acquire a lock on a vnode object.
+ *
+ * @param[in] vnp vnode object pointer
+ * @param[in] type lock type
+ * @param[in] held whether or not vol glock is held
+ * @param[in] safe whether it it is safe to acquire without dropping vol glock
+ *
+ * @note caller must guarantee deadlock will not occur
+ *
+ * @post lock acquired.
+ * for write case, thread owner field set.
+ *
+ * @note for DAFS, this is a no-op
+ *
+ * @internal vnode package internal use only
+ */
+static_inline void
+VnLock(Vnode * vnp, int type, int held, int safe)
+{
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (type == WRITE_LOCK) {
+ VN_SET_WRITER_THREAD_ID(vnp);
+ }
+#else /* !AFS_DEMAND_ATTACH_FS */
+ if (held && !safe) {
+ VOL_UNLOCK;
+ }
+ if (type == READ_LOCK) {
+ ObtainReadLock(&vnp->lock);
+ } else {
+ ObtainWriteLock(&vnp->lock);
+ VN_SET_WRITER_THREAD_ID(vnp);
+ }
+ if (held && !safe) {
+ VOL_LOCK;
+ }
+#endif /* !AFS_DEMAND_ATTACH_FS */
+}
+
+/**
+ * release a lock on a vnode object.
+ *
+ * @param[in] vnp vnode object pointer
+ * @param[in] type lock type
+ *
+ * @note for DAFS, this is a no-op
+ *
+ * @internal vnode package internal use only
+ */
+static_inline void
+VnUnlock(Vnode * vnp, int type)
+{
+ if (type == READ_LOCK) {
+#ifndef AFS_DEMAND_ATTACH_FS
+ ReleaseReadLock(&vnp->lock);
+#endif
+ } else {
+ vnp->writer = 0;
+#ifndef AFS_DEMAND_ATTACH_FS
+ ReleaseWriteLock(&vnp->lock);
+#endif
+ }
+}
+
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * change state, and notify other threads,
+ * return previous state to caller.
+ *
+ * @param[in] vnp pointer to vnode object
+ * @param[in] new_state new vnode state value
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post vnode state changed
+ *
+ * @return previous vnode state
+ *
+ * @note DEMAND_ATTACH_FS only
+ *
+ * @internal vnode package internal use only
+ */
+static_inline VnState
+VnChangeState_r(Vnode * vnp, VnState new_state)
+{
+ VnState old_state = Vn_state(vnp);
+
+ Vn_state(vnp) = new_state;
+ assert(pthread_cond_broadcast(&Vn_stateCV(vnp)) == 0);
+ return old_state;
+}
+
+/**
+ * tells caller whether or not the current state requires
+ * exclusive access without holding glock.
+ *
+ * @param[in] state vnode state enumeration
+ *
+ * @return whether vnode state is a mutually exclusive state
+ * @retval 0 no, state is re-entrant
+ * @retval 1 yes, state is mutually exclusive
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline int
+VnIsExclusiveState(VnState state)
+{
+ switch (state) {
+ case VN_STATE_RELEASING:
+ case VN_STATE_CLOSING:
+ case VN_STATE_ALLOC:
+ case VN_STATE_LOAD:
+ case VN_STATE_EXCLUSIVE:
+ case VN_STATE_STORE:
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * tell caller whether vnode state is an error condition.
+ *
+ * @param[in] state vnode state enumeration
+ *
+ * @return whether vnode state is in error state
+ * @retval 0 state is not an error state
+ * @retval 1 state is an error state
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline int
+VnIsErrorState(VnState state)
+{
+ switch (state) {
+ case VN_STATE_ERROR:
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * tell caller whether vnode state is valid.
+ *
+ * @param[in] state vnode state enumeration
+ *
+ * @return whether vnode state is a mutually exclusive state
+ * @retval 0 no, state is not valid
+ * @retval 1 yes, state is a valid enumeration member
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline int
+VnIsValidState(VnState state)
+{
+ if ((state >= 0) &&
+ (state < VN_STATE_COUNT)) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * wait for the vnode to change states.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held; ref held on vnode
+ *
+ * @post VOL_LOCK held; vnode state has changed from previous value
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VnWaitStateChange_r(Vnode * vnp)
+{
+ VnState state_save = Vn_state(vnp);
+
+ assert(Vn_refcount(vnp));
+ do {
+ VOL_CV_WAIT(&Vn_stateCV(vnp));
+ } while (Vn_state(vnp) == state_save);
+ assert(!(Vn_stateFlags(vnp) & VN_ON_LRU));
+}
+
+/**
+ * wait for blocking ops to end.
+ *
+ * @pre VOL_LOCK held; ref held on vnode
+ *
+ * @post VOL_LOCK held; vnode not in exclusive state
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VnWaitExclusiveState_r(Vnode * vnp)
+{
+ assert(Vn_refcount(vnp));
+ while (VnIsExclusiveState(Vn_state(vnp))) {
+ VOL_CV_WAIT(&Vn_stateCV(vnp));
+ }
+ assert(!(Vn_stateFlags(vnp) & VN_ON_LRU));
+}
+
+/**
+ * wait until vnode is in non-exclusive state, and there are no active readers.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held; ref held on vnode
+ *
+ * @post VOL_LOCK held; vnode is in non-exclusive state and has no active readers
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VnWaitQuiescent_r(Vnode * vnp)
+{
+ assert(Vn_refcount(vnp));
+ while (VnIsExclusiveState(Vn_state(vnp)) ||
+ Vn_readers(vnp)) {
+ VOL_CV_WAIT(&Vn_stateCV(vnp));
+ }
+ assert(!(Vn_stateFlags(vnp) & VN_ON_LRU));
+}
+
+/**
+ * register a new reader on a vnode.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held.
+ * ref held on vnode.
+ * vnode in VN_STATE_READ or VN_STATE_ONLINE
+ *
+ * @post refcount incremented.
+ * state set to VN_STATE_READ.
+ *
+ * @note DEMAND_ATTACH_FS only
+ *
+ * @internal vnode package internal use only
+ */
+static_inline void
+VnBeginRead_r(Vnode * vnp)
+{
+ if (!Vn_readers(vnp)) {
+ assert(Vn_state(vnp) == VN_STATE_ONLINE);
+ VnChangeState_r(vnp, VN_STATE_READ);
+ }
+ Vn_readers(vnp)++;
+ assert(Vn_state(vnp) == VN_STATE_READ);
+}
+
+/**
+ * deregister a reader on a vnode.
+ *
+ * @param[in] vnp vnode object pointer
+ *
+ * @pre VOL_LOCK held.
+ * ref held on vnode.
+ * read ref held on vnode.
+ * vnode in VN_STATE_READ.
+ *
+ * @post refcount decremented.
+ * when count reaches zero, state set to VN_STATE_ONLINE.
+ *
+ * @note DEMAND_ATTACH_FS only
+ *
+ * @internal vnode package internal use only
+ */
+static_inline void
+VnEndRead_r(Vnode * vnp)
+{
+ assert(Vn_readers(vnp) > 0);
+ Vn_readers(vnp)--;
+ if (!Vn_readers(vnp)) {
+ assert(pthread_cond_broadcast(&Vn_stateCV(vnp)) == 0);
+ VnChangeState_r(vnp, VN_STATE_ONLINE);
+ }
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#endif /* _AFS_VOL_VNODE_INLINE_H */
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2005-2008 Sine Nomine Associates
*/
/* 1/1/89: NB: this stuff is all going to be replaced. Don't take it too seriously */
#include "vnode.h"
#include "volume.h"
#include "partition.h"
+#include "volume_inline.h"
#ifdef AFS_PTHREAD_ENV
#include <assert.h>
#else /* AFS_PTHREAD_ENV */
/* extended volume package statistics */
VolPkgStats VStats;
+#ifdef VOL_LOCK_DEBUG
+pthread_t vol_glock_holder = 0;
+#endif
+
#define VOLUME_BITMAP_GROWSIZE 16 /* bytes, => 128vnodes */
/* Must be a multiple of 4 (1 word) !! */
static void VHashEndExclusive_r(VolumeHashChainHead * head);
static void VHashWait_r(VolumeHashChainHead * head);
-/* Volume state machine */
-static void VCreateReservation_r(Volume * vp);
-static void VCancelReservation_r(Volume * vp);
-static void VWaitStateChange_r(Volume * vp);
-static void VWaitExclusiveState_r(Volume * vp);
-static int IsExclusiveState(VolState state);
-static int IsErrorState(VolState state);
-static int IsValidState(VolState state);
-
/* shutdown */
static int ShutdownVByPForPass_r(struct DiskPartition * dp, int pass);
static int ShutdownVolumeWalk_r(struct DiskPartition * dp, int pass,
programType = pt;
-#ifdef AFS_DEMAND_ATTACH_FS
memset(&VStats, 0, sizeof(VStats));
VStats.hdr_cache_size = 200;
-#endif
VInitPartitionPackage();
VInitVolumeHash();
- VInitVnHashByVolume();
#ifdef AFS_DEMAND_ATTACH_FS
if (programType == fileServer) {
VInitVLRU();
}
while(params.n_threads_complete < threads) {
- pthread_cond_wait(¶ms.thread_done_cv,&vol_glock_mutex);
+ VOL_CV_WAIT(¶ms.thread_done_cv);
}
VOL_UNLOCK;
Error error;
Volume *vp;
#ifdef AFS_DEMAND_ATTACH_FS
- vp = VPreAttachVolumeByName(&error, diskP->name, dp->d_name,
- V_VOLUPD);
+ vp = VPreAttachVolumeByName(&error, diskP->name, dp->d_name);
#else /* AFS_DEMAND_ATTACH_FS */
vp = VAttachVolumeByName(&error, diskP->name, dp->d_name,
V_VOLUPD);
/* wait for all the workers to finish pass 3 and terminate */
while (params.pass < 4) {
- assert(pthread_cond_wait(¶ms.cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(¶ms.cv);
}
assert(pthread_attr_destroy(&attrs) == 0);
pass, params->n_threads, params->n_parts);
VOL_LOCK;
} else {
- assert(pthread_cond_wait(¶ms->cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(¶ms->cv);
}
}
pass = params->pass;
break;
}
case 2:
- if (IsExclusiveState(V_attachState(vp))) {
+ if (VIsExclusiveState(V_attachState(vp))) {
break;
}
case 3:
/* wait for other blocking ops to finish */
VWaitExclusiveState_r(vp);
- assert(IsValidState(V_attachState(vp)));
+ assert(VIsValidState(V_attachState(vp)));
switch(V_attachState(vp)) {
case VOL_STATE_SALVAGING:
/***************************************************/
#ifdef AFS_DEMAND_ATTACH_FS
-/* pre-attach a volume given its path
+/**
+ * pre-attach a volume given its path.
+ *
+ * @param[out] ec outbound error code
+ * @param[in] partition partition path string
+ * @param[in] name volume id string
*
- * a pre-attached volume will only have its partition
- * and hashid fields initialized
+ * @return volume object pointer
+ *
+ * @note A pre-attached volume will only have its partition
+ * and hashid fields initialized. At first call to
+ * VGetVolume, the volume will be fully attached.
*
- * at first call to VGetVolume, the volume will be
- * fully attached
*/
Volume *
-VPreAttachVolumeByName(Error * ec, char *partition, char *name, int mode)
+VPreAttachVolumeByName(Error * ec, char *partition, char *name)
{
Volume * vp;
VOL_LOCK;
- vp = VPreAttachVolumeByName_r(ec, partition, name, mode);
+ vp = VPreAttachVolumeByName_r(ec, partition, name);
VOL_UNLOCK;
return vp;
}
+/**
+ * pre-attach a volume given its path.
+ *
+ * @param[out] ec outbound error code
+ * @param[in] partition path to vice partition
+ * @param[in] name volume id string
+ *
+ * @return volume object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @internal volume package internal use only.
+ */
Volume *
-VPreAttachVolumeByName_r(Error * ec, char *partition, char *name, int mode)
+VPreAttachVolumeByName_r(Error * ec, char *partition, char *name)
{
- register Volume *vp = NULL;
- int fd, n;
- struct afs_stat status;
+ return VPreAttachVolumeById_r(ec,
+ partition,
+ VolumeNumber(name));
+}
+
+/**
+ * pre-attach a volume given its path and numeric volume id.
+ *
+ * @param[out] ec error code return
+ * @param[in] partition path to vice partition
+ * @param[in] volumeId numeric volume id
+ *
+ * @return volume object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @internal volume package internal use only.
+ */
+Volume *
+VPreAttachVolumeById_r(Error * ec,
+ char * partition,
+ VolId volumeId)
+{
+ Volume *vp;
struct DiskPartition *partp;
- char path[64];
- int isbusy = 0;
- VolId volumeId;
+
*ec = 0;
assert(programType == fileServer);
if (!(partp = VGetPartition_r(partition, 0))) {
*ec = VNOVOL;
- Log("VPreAttachVolume: Error getting partition (%s)\n", partition);
+ Log("VPreAttachVolumeById_r: Error getting partition (%s)\n", partition);
return NULL;
}
- volumeId = VolumeNumber(name);
-
vp = VLookupVolume_r(ec, volumeId, NULL);
if (*ec) {
return NULL;
}
- return VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+ return VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
}
-/* pre-attach a volume given its partition and volume id
+/**
+ * preattach a volume.
+ *
+ * @param[out] ec outbound error code
+ * @param[in] partp pointer to partition object
+ * @param[in] vp pointer to volume object
+ * @param[in] vid volume id
+ *
+ * @return volume object pointer
+ *
+ * @pre VOL_LOCK is held.
*
- * if vp == NULL, then a new vp is created
- * if vp != NULL, then we assumed it is already on the hash chain
+ * @warning Returned volume object pointer does not have to
+ * equal the pointer passed in as argument vp. There
+ * are potential race conditions which can result in
+ * the pointers having different values. It is up to
+ * the caller to make sure that references are handled
+ * properly in this case.
+ *
+ * @note If there is already a volume object registered with
+ * the same volume id, its pointer MUST be passed as
+ * argument vp. Failure to do so will result in a silent
+ * failure to preattach.
+ *
+ * @internal volume package internal use only.
*/
Volume *
-VPreAttachVolumeById_r(Error * ec, struct DiskPartition * partp,
- Volume * vp, int vid)
+VPreAttachVolumeByVp_r(Error * ec,
+ struct DiskPartition * partp,
+ Volume * vp,
+ VolId vid)
{
Volume *nvp = NULL;
/* check to see if pre-attach already happened */
if (vp &&
(V_attachState(vp) != VOL_STATE_UNATTACHED) &&
- !IsErrorState(V_attachState(vp))) {
+ !VIsErrorState(V_attachState(vp)) &&
+ ((V_attachState(vp) != VOL_STATE_PREATTACHED) ||
+ vp->pending_vol_op == NULL)) {
+ /*
+ * pre-attach is a no-op in all but the following cases:
+ *
+ * - volume is unattached
+ * - volume is in an error state
+ * - volume is pre-attached with a pending volume operation
+ * (e.g. vos move between two partitions on same server)
+ */
goto done;
} else if (vp) {
/* we're re-attaching a volume; clear out some old state */
memset(&vp->salvage, 0, sizeof(struct VolumeOnlineSalvage));
+
+ if (V_partition(vp) != partp) {
+ /* XXX potential race */
+ DeleteVolumeFromVByPList_r(vp);
+ }
} else {
/* if we need to allocate a new Volume struct,
* go ahead and drop the vol glock, otherwise
vp = nvp = (Volume *) malloc(sizeof(Volume));
assert(vp != NULL);
memset(vp, 0, sizeof(Volume));
+ queue_Init(&vp->vnode_list);
assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
}
VChangeState_r(vp, VOL_STATE_PREATTACHED);
if (LogLevel >= 5)
- Log("VPreAttachVolumeById_r: volume %u pre-attached\n", vp->hashid);
+ Log("VPreAttachVolumeByVp_r: volume %u pre-attached\n", vp->hashid);
done:
if (*ec)
(V_attachState(vp) == VOL_STATE_UNATTACHED) ||
(V_attachState(vp) == VOL_STATE_ERROR)) {
svp = vp;
- vp = VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+ vp = VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
if (*ec) {
return NULL;
}
assert(vp != NULL);
vp->device = partp->device;
vp->partition = partp;
+ queue_Init(&vp->vnode_list);
#ifdef AFS_DEMAND_ATTACH_FS
assert(pthread_cond_init(&V_attachCV(vp), NULL) == 0);
#endif /* AFS_DEMAND_ATTACH_FS */
} else
#endif
if (programType == fileServer && vp) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * we can get here in cases where we don't "own"
+ * the volume (e.g. volume owned by a utility).
+ * short circuit around potential disk header races.
+ */
+ if (V_attachState(vp) != VOL_STATE_ATTACHED) {
+ goto done;
+ }
+#endif
V_needsCallback(vp) = 0;
#ifdef notdef
if (VInit >= 2 && V_BreakVolumeCallbacks) {
Log("VOnline: volume %u (%s) attached and online\n", V_id(vp),
V_name(vp));
}
+
done:
if (programType == volumeUtility) {
VUnlockPartition_r(partition);
}
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- if (vp) {
- V_attachState(vp) = VOL_STATE_ERROR;
- assert(pthread_cond_broadcast(&V_attachCV(vp)) == 0);
+ /* attach failed; make sure we're in error state */
+ if (vp && !VIsErrorState(V_attachState(vp))) {
+ VChangeState_r(vp, VOL_STATE_ERROR);
}
#endif /* AFS_DEMAND_ATTACH_FS */
return NULL;
if (!vp ||
(V_attachState(vp) == VOL_STATE_UNATTACHED) ||
(V_attachState(vp) == VOL_STATE_ERROR)) {
- nvp = VPreAttachVolumeById_r(ec, partp, vp, volumeId);
+ nvp = VPreAttachVolumeByVp_r(ec, partp, vp, volumeId);
if (*ec) {
return NULL;
}
* with vol_glock_mutex held */
vp = attach2(ec, volumeId, path, &iheader, partp, vp, isbusy, mode);
- if (*ec || vp == NULL) {
+ /*
+ * the event that an error was encountered, or
+ * the volume was not brought to an attached state
+ * for any reason, skip to the end. We cannot
+ * safely call VUpdateVolume unless we "own" it.
+ */
+ if (*ec ||
+ (vp == NULL) ||
+ (V_attachState(vp) != VOL_STATE_ATTACHED)) {
goto done;
}
reserve = 0;
}
if (*ec && (*ec != VOFFLINE) && (*ec != VSALVAGE)) {
- if (vp && !IsErrorState(V_attachState(vp))) {
+ if (vp && !VIsErrorState(V_attachState(vp))) {
VChangeState_r(vp, VOL_STATE_ERROR);
}
return NULL;
#endif
VOL_LOCK;
-#ifdef AFS_DEMAND_ATTACH_FS
IncUInt64(&VStats.attaches);
-#endif
vp->cacheCheck = ++VolumeCacheCheck;
/* just in case this ever rolls over */
if (!vp->cacheCheck)
}
V_attachFlags(vp) |= VOL_HDR_LOADED;
+ vp->stats.last_hdr_load = vp->stats.last_attach;
}
#endif /* AFS_DEMAND_ATTACH_FS */
if (*ec && ((*ec != VOFFLINE) || (V_attachState(vp) != VOL_STATE_UNATTACHED))) {
VOL_LOCK;
if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
- *ec = VSALVAGING;
} else {
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
FreeVolume(vp);
VOL_LOCK;
#if defined(AFS_DEMAND_ATTACH_FS)
if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
- *ec = VSALVAGING;
} else {
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
FreeVolume(vp);
VUpdateVolume_r(ec, vp, 0);
}
#if defined(AFS_DEMAND_ATTACH_FS)
- VRequestSalvage_r(vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
- *ec = VSALVAGING;
#else /* AFS_DEMAND_ATTACH_FS */
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
FreeVolume(vp);
if (V_destroyMe(vp) == DESTROY_ME) {
#if defined(AFS_DEMAND_ATTACH_FS)
/* schedule a salvage so the volume goes away on disk */
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
VChangeState_r(vp, VOL_STATE_ERROR);
vp->nUsers = 0;
#endif /* AFS_DEMAND_ATTACH_FS */
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
vp->nUsers = 0;
- *ec = VSALVAGING;
#else /* AFS_DEMAND_ATTACH_FS */
FreeVolume(vp);
#endif /* AFS_DEMAND_ATTACH_FS */
#ifdef AFS_DEMAND_ATTACH_FS
AddVolumeToVByPList_r(vp);
VLRU_Add_r(vp);
- VChangeState_r(vp, VOL_STATE_ATTACHED);
+ if ((programType != fileServer) ||
+ V_inUse(vp)) {
+ VChangeState_r(vp, VOL_STATE_ATTACHED);
+ } else {
+ VChangeState_r(vp, VOL_STATE_UNATTACHED);
+ }
#endif
return vp;
}
/* get and put volume routines */
/***************************************************/
+/**
+ * put back a heavyweight reference to a volume object.
+ *
+ * @param[in] vp volume object pointer
+ *
+ * @pre VOL_LOCK held
+ *
+ * @post heavyweight volume reference put back.
+ * depending on state, volume may have been taken offline,
+ * detached, salvaged, freed, etc.
+ *
+ * @internal volume package internal use only
+ */
void
VPutVolume_r(register Volume * vp)
{
#else
#define VGET_CTR_INC(x)
#endif
-
#ifdef AFS_DEMAND_ATTACH_FS
Volume *avp, * rvp = hint;
+#endif
+#ifdef AFS_DEMAND_ATTACH_FS
if (rvp) {
VCreateReservation_r(rvp);
}
break;
}
+ /*
+ * short circuit with VOFFLINE in the following circumstances:
+ *
+ * VOL_STATE_UNATTACHED
+ */
+ if (V_attachState(vp) == VOL_STATE_UNATTACHED) {
+ *ec = VOFFLINE;
+ vp = NULL;
+ break;
+ }
+
/* allowable states:
* UNATTACHED
* PREATTACHED
vp->hashid);
#ifdef AFS_DEMAND_ATTACH_FS
if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- *ec = VSALVAGING;
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
} else {
FreeVolume(vp);
vp = NULL;
}
#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * this test MUST happen after the volume header is loaded
+ */
if (vp->pending_vol_op && !VVolOpLeaveOnline_r(vp, vp->pending_vol_op)) {
if (client_ec) {
/* see CheckVnode() in afsfileprocs.c for an explanation
vp = NULL;
break;
}
-
- if (V_attachState(vp) == VOL_STATE_UNATTACHED) {
- *ec = VOFFLINE;
- ReleaseVolumeHeader(vp->header);
- vp = NULL;
- break;
- }
#endif /* AFS_DEMAND_ATTACH_FS */
VGET_CTR_INC(V7);
VWaitStateChange_r(vp);
}
#elif defined(AFS_PTHREAD_ENV)
- assert(pthread_cond_wait(&vol_put_volume_cond, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&vol_put_volume_cond);
#else /* AFS_PTHREAD_ENV */
LWP_WaitProcess(VPutVolume);
#endif /* AFS_PTHREAD_ENV */
void
VTakeOffline_r(register Volume * vp)
{
+ Error error;
+
assert(vp->nUsers > 0);
assert(programType == fileServer);
vp->goingOffline = 1;
V_needsSalvaged(vp) = 1;
- VRequestSalvage_r(vp, SALVSYNC_ERROR, 0);
+ VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, 0);
VCancelReservation_r(vp);
}
#else /* AFS_DEMAND_ATTACH_FS */
VOL_UNLOCK;
}
-/* Force the volume offline, set the salvage flag. No further references to
- * the volume through the volume package will be honored. */
-/* for demand attach, caller MUST hold ref count on vp */
+/**
+ * force a volume offline.
+ *
+ * @param[in] vp volume object pointer
+ * @param[in] flags flags (see note below)
+ *
+ * @note the flag VOL_FORCEOFF_NOUPDATE is a recursion control flag
+ * used when VUpdateVolume_r needs to call VForceOffline_r
+ * (which in turn would normally call VUpdateVolume_r)
+ *
+ * @see VUpdateVolume_r
+ *
+ * @pre VOL_LOCK must be held.
+ * for DAFS, caller must hold ref.
+ *
+ * @note for DAFS, it _is safe_ to call this function from an
+ * exclusive state
+ *
+ * @post needsSalvaged flag is set.
+ * for DAFS, salvage is requested.
+ * no further references to the volume through the volume
+ * package will be honored.
+ * all file descriptor and vnode caches are invalidated.
+ *
+ * @warning this is a heavy-handed interface. it results in
+ * a volume going offline regardless of the current
+ * reference count state.
+ *
+ * @internal volume package internal use only
+ */
void
VForceOffline_r(Volume * vp, int flags)
{
Error error;
- if (!V_inUse(vp))
+ if (!V_inUse(vp)) {
+#ifdef AFS_DEMAND_ATTACH_FS
+ VChangeState_r(vp, VOL_STATE_ERROR);
+#endif
return;
+ }
+
strcpy(V_offlineMessage(vp),
"Forced offline due to internal error: volume needs to be salvaged");
Log("Volume %u forced offline: it needs salvaging!\n", V_id(vp));
+
V_inUse(vp) = 0;
vp->goingOffline = 0;
V_needsSalvaged(vp) = 1;
if (!(flags & VOL_FORCEOFF_NOUPDATE)) {
- VUpdateVolume_r(&error, vp, VOL_UPDATE_WAIT | VOL_UPDATE_NOFORCEOFF);
+ VUpdateVolume_r(&error, vp, VOL_UPDATE_NOFORCEOFF);
}
+
#ifdef AFS_DEMAND_ATTACH_FS
-#ifdef SALVSYNC_BUILD_CLIENT
- if (programType == fileServer) {
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- }
-#endif
- VChangeState_r(vp, VOL_STATE_ERROR);
+ VRequestSalvage_r(&error, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
#endif /* AFS_DEMAND_ATTACH_FS */
+
#ifdef AFS_PTHREAD_ENV
assert(pthread_cond_broadcast(&vol_put_volume_cond) == 0);
#else /* AFS_PTHREAD_ENV */
VReleaseVolumeHandles_r(vp);
}
+/**
+ * force a volume offline.
+ *
+ * @param[in] vp volume object pointer
+ *
+ * @see VForceOffline_r
+ */
void
VForceOffline(Volume * vp)
{
{
VolumeId volume;
struct DiskPartition *tpartp;
- int notifyServer, useDone;
+ int notifyServer, useDone = FSYNC_VOL_ON;
*ec = 0; /* always "succeeds" */
if (programType == volumeUtility) {
notifyServer = vp->needsPutBack;
- useDone = (V_destroyMe(vp) == DESTROY_ME);
+ if (V_destroyMe(vp) == DESTROY_ME)
+ useDone = FSYNC_VOL_DONE;
+#ifdef AFS_DEMAND_ATTACH_FS
+ else if (!V_blessed(vp) || !V_inService(vp))
+ useDone = FSYNC_VOL_LEAVE_OFF;
+#endif
}
tpartp = vp->partition;
volume = V_id(vp);
* would be two instances of the same volume, one of them bogus,
* which the file server would attempt to put on line
*/
- if (useDone) {
- /* don't put online */
- FSYNC_VolOp(volume, tpartp->name, FSYNC_VOL_DONE, 0, NULL);
- } else {
- /* fs can use it again */
- FSYNC_VolOp(volume, tpartp->name, FSYNC_VOL_ON, 0, NULL);
-
- /* XXX this code path is only hit by volume utilities, thus
- * V_BreakVolumeCallbacks will always be NULL. if we really
- * want to break callbacks in this path we need to use FSYNC_VolOp() */
+ FSYNC_VolOp(volume, tpartp->name, useDone, 0, NULL);
+ /* XXX this code path is only hit by volume utilities, thus
+ * V_BreakVolumeCallbacks will always be NULL. if we really
+ * want to break callbacks in this path we need to use FSYNC_VolOp() */
#ifdef notdef
- /* Dettaching it so break all callbacks on it */
- if (V_BreakVolumeCallbacks) {
- Log("volume %u detached; breaking all call backs\n", volume);
- (*V_BreakVolumeCallbacks) (volume);
- }
-#endif
+ /* Dettaching it so break all callbacks on it */
+ if (V_BreakVolumeCallbacks) {
+ Log("volume %u detached; breaking all call backs\n", volume);
+ (*V_BreakVolumeCallbacks) (volume);
}
+#endif
}
#endif /* FSSYNC_BUILD_CLIENT */
}
*
* VOL_STATE_GOING_OFFLINE
* VOL_STATE_SHUTTING_DOWN
- * IsErrorState(V_attachState(vp))
- * IsExclusiveState(V_attachState(vp))
+ * VIsErrorState(V_attachState(vp))
+ * VIsExclusiveState(V_attachState(vp))
*/
VCreateReservation_r(vp);
/* if nothing changed state to error or salvaging,
* drop state to unattached */
- if (!IsErrorState(V_attachState(vp))) {
+ if (!VIsErrorState(V_attachState(vp))) {
VChangeState_r(vp, VOL_STATE_UNATTACHED);
}
VCancelReservation_r(vp);
* from free()ing the Volume struct during an async i/o op */
/* register with the async volume op ref counter */
-static void
-VCreateReservation_r(Volume * vp)
-{
- vp->nWaiters++;
-}
+/* VCreateReservation_r moved into inline code header because it
+ * is now needed in vnode.c -- tkeiser 11/20/2007
+ */
-/* unregister with the async volume op ref counter */
-static void
+/**
+ * decrement volume-package internal refcount.
+ *
+ * @param vp volume object pointer
+ *
+ * @internal volume package internal use only
+ *
+ * @pre
+ * @arg VOL_LOCK is held
+ * @arg lightweight refcount held
+ *
+ * @post volume waiters refcount is decremented; volume may
+ * have been deallocated/shutdown/offlined/salvaged/
+ * whatever during the process
+ *
+ * @warning once you have tossed your last reference (you can acquire
+ * lightweight refs recursively) it is NOT SAFE to reference
+ * a volume object pointer ever again
+ *
+ * @see VCreateReservation_r
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+void
VCancelReservation_r(Volume * vp)
{
assert(--vp->nWaiters >= 0);
/***************************************************/
#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * register a volume operation on a given volume.
+ *
+ * @param[in] vp volume object
+ * @param[in] vopinfo volume operation info object
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @post volume operation info object attached to volume object.
+ * volume operation statistics updated.
+ *
+ * @note by "attached" we mean a copy of the passed in object is made
+ *
+ * @internal volume package internal use only
+ */
int
VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
return 0;
}
+/**
+ * deregister the volume operation attached to this volume.
+ *
+ * @param[in] vp volume object pointer
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @post the volume operation info object is detached from the volume object
+ *
+ * @internal volume package internal use only
+ */
int
-VDeregisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
+VDeregisterVolOp_r(Volume * vp)
{
if (vp->pending_vol_op) {
free(vp->pending_vol_op);
}
#endif /* AFS_DEMAND_ATTACH_FS */
+/**
+ * determine whether it is safe to leave a volume online during
+ * the volume operation described by the vopinfo object.
+ *
+ * @param[in] vp volume object
+ * @param[in] vopinfo volume operation info object
+ *
+ * @return whether it is safe to leave volume online
+ * @retval 0 it is NOT SAFE to leave the volume online
+ * @retval 1 it is safe to leave the volume online during the operation
+ *
+ * @pre
+ * @arg VOL_LOCK is held
+ * @arg disk header attached to vp (heavyweight ref on vp will guarantee
+ * this condition is met)
+ *
+ * @internal volume package internal use only
+ */
int
VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
vopinfo->com.reason == V_DUMP))));
}
+/**
+ * determine whether VBUSY should be set during this volume operation.
+ *
+ * @param[in] vp volume object
+ * @param[in] vopinfo volume operation info object
+ *
+ * @return whether VBUSY should be set
+ * @retval 0 VBUSY does NOT need to be set
+ * @retval 1 VBUSY SHOULD be set
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @internal volume package internal use only
+ */
int
VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
{
/* online salvager routines */
/***************************************************/
#if defined(AFS_DEMAND_ATTACH_FS)
-#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /* number of seconds between prio updates */
-#define SALVAGE_COUNT_MAX 16 /* number of online salvages we
- * allow before moving the volume
- * into a permanent error state
+#define SALVAGE_PRIO_UPDATE_INTERVAL 3 /**< number of seconds between prio updates */
+#define SALVAGE_COUNT_MAX 16 /**< number of online salvages we
+ * allow before moving the volume
+ * into a permanent error state
*
- * once this threshold is reached,
- * the operator will have to manually
- * issue a 'bos salvage' to bring
- * the volume back online
+ * once this threshold is reached,
+ * the operator will have to manually
+ * issue a 'bos salvage' to bring
+ * the volume back online
*/
-/* check to see if we should salvage this volume
- * returns 1 if salvage scheduled, 0 otherwise */
+/**
+ * check whether a salvage needs to be performed on this volume.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @return status code
+ * @retval 0 no salvage scheduled
+ * @retval 1 a salvage has been scheduled with the salvageserver
+ *
+ * @pre VOL_LOCK is held
+ *
+ * @post if salvage request flag is set and nUsers and nWaiters are zero,
+ * then a salvage will be requested
+ *
+ * @note this is one of the event handlers called by VCancelReservation_r
+ *
+ * @see VCancelReservation_r
+ *
+ * @internal volume package internal use only.
+ */
static int
VCheckSalvage(register Volume * vp)
{
return ret;
}
-/*
- * request that a salvage be performed once
- * ref counts reach zero
+/**
+ * request volume salvage.
+ *
+ * @param[out] ec computed client error code
+ * @param[in] vp volume object pointer
+ * @param[in] reason reason code (passed to salvageserver via SALVSYNC)
+ * @param[in] flags see flags note below
+ *
+ * @note flags:
+ * VOL_SALVAGE_INVALIDATE_HEADER causes volume header cache entry
+ * to be invalidated.
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post volume state is changed.
+ * for fileserver, salvage will be requested once refcount reaches zero.
+ *
+ * @return operation status code
+ * @retval 0 volume salvage will occur
+ * @retval 1 volume salvage could not be scheduled
+ *
+ * @note DAFS fileserver only
+ *
+ * @note this call does not synchronously schedule a volume salvage. rather,
+ * it sets volume state so that when volume refcounts reach zero, a
+ * volume salvage will occur. by "refcounts", we mean both nUsers and
+ * nWaiters must be zero.
+ *
+ * @internal volume package internal use only.
*/
int
-VRequestSalvage_r(Volume * vp, int reason, int flags)
+VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags)
{
-#ifdef SALVSYNC_BUILD_CLIENT
- if (programType != fileServer)
+ int code = 0;
+ /*
+ * for DAFS volume utilities, transition to error state
+ * (at some point in the future, we should consider
+ * making volser talk to salsrv)
+ */
+ if (programType != fileServer) {
+ VChangeState_r(vp, VOL_STATE_ERROR);
+ *ec = VSALVAGE;
return 1;
+ }
if (!vp->salvage.requested) {
vp->salvage.requested = 1;
vp->salvage.reason = reason;
vp->stats.last_salvage = FT_ApproxTime();
if (flags & VOL_SALVAGE_INVALIDATE_HEADER) {
+ /* XXX this should likely be changed to FreeVolumeHeader() */
ReleaseVolumeHeader(vp->header);
}
if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
VChangeState_r(vp, VOL_STATE_SALVAGING);
+ *ec = VSALVAGING;
} else {
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
VChangeState_r(vp, VOL_STATE_ERROR);
+ *ec = VSALVAGE;
+ code = 1;
}
}
-#endif /* SALVSYNC_BUILD_CLIENT */
- return 0;
+ return code;
}
-/*
- * update salvage priority
+/**
+ * update salvageserver scheduling priority for a volume.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 request denied, or SALVSYNC communications failure
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post in-core salvage priority counter is incremented. if at least
+ * SALVAGE_PRIO_UPDATE_INTERVAL seconds have elapsed since the
+ * last SALVSYNC_RAISEPRIO request, we contact the salvageserver
+ * to update its priority queue. if no salvage is scheduled,
+ * this function is a no-op.
+ *
+ * @note DAFS fileserver only
+ *
+ * @note this should be called whenever a VGetVolume fails due to a
+ * pending salvage request
+ *
+ * @todo should set exclusive state and drop glock around salvsync call
+ *
+ * @internal volume package internal use only.
*/
static int
VUpdateSalvagePriority_r(Volume * vp)
}
-/*
- * schedule a salvage with the salvage server
+/**
+ * schedule a salvage with the salvage server.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @return operation status
+ * @retval 0 salvage scheduled successfully
+ * @retval 1 salvage not scheduled, or SALVSYNC com error
+ *
+ * @pre
+ * @arg VOL_LOCK is held.
+ * @arg nUsers and nWaiters should be zero.
+ *
+ * @post salvageserver is sent a salvage request
+ *
+ * @note DAFS fileserver only
+ *
+ * @internal volume package internal use only.
*/
static int
VScheduleSalvage_r(Volume * vp)
*
* set the volume to an exclusive state and drop the lock
* around the SALVSYNC call
+ *
+ * note that we do NOT acquire a reservation here -- doing so
+ * could result in unbounded recursion
*/
strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
return ret;
}
-/*
- * cancel a scheduled salvage operation
+/**
+ * ask salvageserver to cancel a scheduled salvage operation.
+ *
+ * @param[in] vp pointer to volume object
+ * @param[in] reason SALVSYNC protocol reason code
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 request failed
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post salvageserver is sent a request to cancel the volume salvage
+ *
+ * @todo should set exclusive state and drop glock around salvsync call
+ *
+ * @internal volume package internal use only.
*/
static int
VCancelSalvage_r(Volume * vp, int reason)
return ret;
}
-/* This must be called by any volume utility which needs to run while the
- file server is also running. This is separated from VInitVolumePackage so
- that a utility can fork--and each of the children can independently
- initialize communication with the file server */
+
#ifdef SALVSYNC_BUILD_CLIENT
+/**
+ * connect to the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @post connection to salvageserver SYNC service established
+ *
+ * @see VConnectSALV_r
+ * @see VDisconnectSALV
+ * @see VReconnectSALV
+ */
int
VConnectSALV(void)
{
return retVal;
}
+/**
+ * connect to the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post connection to salvageserver SYNC service established
+ *
+ * @see VConnectSALV
+ * @see VDisconnectSALV_r
+ * @see VReconnectSALV_r
+ * @see SALVSYNC_clientInit
+ *
+ * @internal volume package internal use only.
+ */
int
VConnectSALV_r(void)
{
return SALVSYNC_clientInit();
}
+/**
+ * disconnect from the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @pre client should have a live connection to the salvageserver
+ *
+ * @post connection to salvageserver SYNC service destroyed
+ *
+ * @see VDisconnectSALV_r
+ * @see VConnectSALV
+ * @see VReconnectSALV
+ */
int
VDisconnectSALV(void)
{
return retVal;
}
+/**
+ * disconnect from the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 success
+ *
+ * @pre
+ * @arg VOL_LOCK is held.
+ * @arg client should have a live connection to the salvageserver.
+ *
+ * @post connection to salvageserver SYNC service destroyed
+ *
+ * @see VDisconnectSALV
+ * @see VConnectSALV_r
+ * @see VReconnectSALV_r
+ * @see SALVSYNC_clientFinis
+ *
+ * @internal volume package internal use only.
+ */
int
VDisconnectSALV_r(void)
{
return SALVSYNC_clientFinis();
}
+/**
+ * disconnect and then re-connect to the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre client should have a live connection to the salvageserver
+ *
+ * @post old connection is dropped, and a new one is established
+ *
+ * @see VConnectSALV
+ * @see VDisconnectSALV
+ * @see VReconnectSALV_r
+ */
int
VReconnectSALV(void)
{
return retVal;
}
+/**
+ * disconnect and then re-connect to the salvageserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre
+ * @arg VOL_LOCK is held.
+ * @arg client should have a live connection to the salvageserver.
+ *
+ * @post old connection is dropped, and a new one is established
+ *
+ * @see VConnectSALV_r
+ * @see VDisconnectSALV
+ * @see VReconnectSALV
+ * @see SALVSYNC_clientReconnect
+ *
+ * @internal volume package internal use only.
+ */
int
VReconnectSALV_r(void)
{
that a utility can fork--and each of the children can independently
initialize communication with the file server */
#ifdef FSSYNC_BUILD_CLIENT
+/**
+ * connect to the fileserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre
+ * @arg VInit must equal 2.
+ * @arg Program Type must not be fileserver or salvager.
+ *
+ * @post connection to fileserver SYNC service established
+ *
+ * @see VConnectFS_r
+ * @see VDisconnectFS
+ * @see VChildProcReconnectFS
+ */
int
VConnectFS(void)
{
return retVal;
}
+/**
+ * connect to the fileserver SYNC service.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre
+ * @arg VInit must equal 2.
+ * @arg Program Type must not be fileserver or salvager.
+ * @arg VOL_LOCK is held.
+ *
+ * @post connection to fileserver SYNC service established
+ *
+ * @see VConnectFS
+ * @see VDisconnectFS_r
+ * @see VChildProcReconnectFS_r
+ *
+ * @internal volume package internal use only.
+ */
int
VConnectFS_r(void)
{
return rc;
}
+/**
+ * disconnect from the fileserver SYNC service.
+ *
+ * @pre
+ * @arg client should have a live connection to the fileserver.
+ * @arg VOL_LOCK is held.
+ * @arg Program Type must not be fileserver or salvager.
+ *
+ * @post connection to fileserver SYNC service destroyed
+ *
+ * @see VDisconnectFS
+ * @see VConnectFS_r
+ * @see VChildProcReconnectFS_r
+ *
+ * @internal volume package internal use only.
+ */
void
VDisconnectFS_r(void)
{
VInit = 2;
}
-void
-VDisconnectFS(void)
-{
+/**
+ * disconnect from the fileserver SYNC service.
+ *
+ * @pre
+ * @arg client should have a live connection to the fileserver.
+ * @arg Program Type must not be fileserver or salvager.
+ *
+ * @post connection to fileserver SYNC service destroyed
+ *
+ * @see VDisconnectFS_r
+ * @see VConnectFS
+ * @see VChildProcReconnectFS
+ */
+void
+VDisconnectFS(void)
+{
VOL_LOCK;
VDisconnectFS_r();
VOL_UNLOCK;
}
+/**
+ * connect to the fileserver SYNC service from a child process following a fork.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre
+ * @arg VOL_LOCK is held.
+ * @arg current FSYNC handle is shared with a parent process
+ *
+ * @post current FSYNC handle is discarded and a new connection to the
+ * fileserver SYNC service is established
+ *
+ * @see VChildProcReconnectFS
+ * @see VConnectFS_r
+ * @see VDisconnectFS_r
+ *
+ * @internal volume package internal use only.
+ */
int
VChildProcReconnectFS_r(void)
{
return FSYNC_clientChildProcReconnect();
}
+/**
+ * connect to the fileserver SYNC service from a child process following a fork.
+ *
+ * @return operation status
+ * @retval 0 failure
+ * @retval 1 success
+ *
+ * @pre current FSYNC handle is shared with a parent process
+ *
+ * @post current FSYNC handle is discarded and a new connection to the
+ * fileserver SYNC service is established
+ *
+ * @see VChildProcReconnectFS_r
+ * @see VConnectFS
+ * @see VDisconnectFS
+ */
int
VChildProcReconnectFS(void)
{
VOL_UNLOCK;
sleep(2);
VOL_LOCK;
-#else /* AFS_PTHREAD_ENV */
+#else /* !AFS_PTHREAD_ENV */
IOMGR_Sleep(2);
-#endif /* AFS_DEMAND_ATTACH_FS */
+#endif /* !AFS_PTHREAD_ENV */
}
}
}
VGetBitmap_r(ec, vp, i);
if (*ec) {
#ifdef AFS_DEMAND_ATTACH_FS
- VRequestSalvage_r(vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
- *ec = VSALVAGING;
+ VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
#else /* AFS_DEMAND_ATTACH_FS */
DeleteVolumeFromHashTable(vp);
vp->shuttingDown = 1; /* Let who has it free it. */
/***************************************************/
-/* demand attach fs state machine routines */
-/***************************************************/
-
-#ifdef AFS_DEMAND_ATTACH_FS
-/* wait for the volume to change states */
-static void
-VWaitStateChange_r(Volume * vp)
-{
- VolState state_save = V_attachState(vp);
-
- assert(vp->nWaiters || vp->nUsers);
- do {
- assert(pthread_cond_wait(&V_attachCV(vp), &vol_glock_mutex) == 0);
- } while (V_attachState(vp) == state_save);
- assert(V_attachState(vp) != VOL_STATE_FREED);
-}
-
-/* wait for blocking ops to end */
-static void
-VWaitExclusiveState_r(Volume * vp)
-{
- assert(vp->nWaiters || vp->nUsers);
- while (IsExclusiveState(V_attachState(vp))) {
- assert(pthread_cond_wait(&V_attachCV(vp), &vol_glock_mutex) == 0);
- }
- assert(V_attachState(vp) != VOL_STATE_FREED);
-}
-
-/* change state, and notify other threads,
- * return previous state to caller */
-VolState
-VChangeState_r(Volume * vp, VolState new_state)
-{
- VolState old_state = V_attachState(vp);
-
- /* XXX profiling need to make sure these counters
- * don't kill performance... */
- VStats.state_levels[old_state]--;
- VStats.state_levels[new_state]++;
-
- V_attachState(vp) = new_state;
- assert(pthread_cond_broadcast(&V_attachCV(vp)) == 0);
- return old_state;
-}
-
-/* tells caller whether or not the current state requires
- * exclusive access without holding glock */
-static int
-IsExclusiveState(VolState state)
-{
- switch (state) {
- case VOL_STATE_UPDATING:
- case VOL_STATE_ATTACHING:
- case VOL_STATE_GET_BITMAP:
- case VOL_STATE_HDR_LOADING:
- case VOL_STATE_HDR_ATTACHING:
- case VOL_STATE_OFFLINING:
- case VOL_STATE_DETACHING:
- return 1;
- }
- return 0;
-}
-
-/* tell caller whether V_attachState is an error condition */
-static int
-IsErrorState(VolState state)
-{
- switch (state) {
- case VOL_STATE_ERROR:
- case VOL_STATE_SALVAGING:
- return 1;
- }
- return 0;
-}
-
-/* tell caller whether V_attachState is valid */
-static int
-IsValidState(VolState state)
-{
- if ((state >= 0) &&
- (state < VOL_STATE_COUNT) &&
- (state != VOL_STATE_FREED)) {
- return 1;
- }
- return 0;
-}
-#endif /* AFS_DEMAND_ATTACH_FS */
-
-
-/***************************************************/
/* Volume Path and Volume Number utility routines */
/***************************************************/
+/**
+ * find the first occurrence of a volume header file and return the path.
+ *
+ * @param[out] ec outbound error code
+ * @param[in] volumeId volume id to find
+ * @param[out] partitionp pointer to disk partition path string
+ * @param[out] namep pointer to volume header file name string
+ *
+ * @post path to first occurrence of volume header is returned in partitionp
+ * and namep, or ec is set accordingly.
+ *
+ * @warning this function is NOT re-entrant -- partitionp and namep point to
+ * static data segments
+ *
+ * @note if a volume utility inadvertently leaves behind a stale volume header
+ * on a vice partition, it is possible for callers to get the wrong one,
+ * depending on the order of the disk partition linked list.
+ *
+ * @internal volume package internal use only.
+ */
static void
GetVolumePath(Error * ec, VolId volumeId, char **partitionp, char **namep)
{
}
}
+/**
+ * extract a volume number from a volume header filename string.
+ *
+ * @param[in] name volume header filename string
+ *
+ * @return volume number
+ *
+ * @note the string must be of the form VFORMAT. the only permissible
+ * deviation is a leading '/' character.
+ *
+ * @see VFORMAT
+ */
int
VolumeNumber(char *name)
{
return atoi(name + 1);
}
+/**
+ * compute the volume header filename.
+ *
+ * @param[in] volumeId
+ *
+ * @return volume header filename
+ *
+ * @post volume header filename string is constructed
+ *
+ * @warning this function is NOT re-entrant -- the returned string is
+ * stored in a static char array. see VolumeExternalName_r
+ * for a re-entrant equivalent.
+ *
+ * @see VolumeExternalName_r
+ *
+ * @deprecated due to the above re-entrancy warning, this interface should
+ * be considered deprecated. Please use VolumeExternalName_r
+ * in its stead.
+ */
char *
VolumeExternalName(VolumeId volumeId)
{
return name;
}
+/**
+ * compute the volume header filename.
+ *
+ * @param[in] volumeId
+ * @param[inout] name array in which to store filename
+ * @param[in] len length of name array
+ *
+ * @return result code from afs_snprintf
+ *
+ * @see VolumeExternalName
+ * @see afs_snprintf
+ *
+ * @note re-entrant equivalent of VolumeExternalName
+ *
+ * @internal volume package internal use only.
+ */
static int
VolumeExternalName_r(VolumeId volumeId, char * name, size_t len)
{
* candidates for soft detachment. this queue is
* unsorted
*/
-#define VLRU_GENERATIONS 3 /* number of generations in VLRU */
-#define VLRU_QUEUES 5 /* total number of VLRU queues */
+#define VLRU_GENERATIONS 3 /**< number of generations in VLRU */
+#define VLRU_QUEUES 5 /**< total number of VLRU queues */
+
+/**
+ * definition of a VLRU queue.
+ */
struct VLRU_q {
volatile struct rx_queue q;
volatile int len;
volatile int busy;
pthread_cond_t cv;
};
+
+/**
+ * main VLRU data structure.
+ */
struct VLRU {
- struct VLRU_q q[VLRU_QUEUES];
+ struct VLRU_q q[VLRU_QUEUES]; /**< VLRU queues */
/* VLRU config */
- afs_uint32 promotion_interval[VLRU_GENERATIONS-1]; /* interval between promotions */
- afs_uint32 scan_interval[VLRU_GENERATIONS+1]; /* interval between scans for candidates */
+ /** time interval (in seconds) between promotion passes for
+ * each young generation queue. */
+ afs_uint32 promotion_interval[VLRU_GENERATIONS-1];
- /* state */
- int next_idx;
- afs_uint32 last_promotion[VLRU_GENERATIONS-1]; /* timestamp of last promotion scan */
- afs_uint32 last_scan[VLRU_GENERATIONS+1]; /* timestamp of last detach scan */
+ /** time interval (in seconds) between soft detach candidate
+ * scans for each generation queue.
+ *
+ * scan_interval[VLRU_QUEUE_CANDIDATE] defines how frequently
+ * we perform a soft detach pass. */
+ afs_uint32 scan_interval[VLRU_GENERATIONS+1];
- int scanner_state; /* state of scanner thread */
- pthread_cond_t cv; /* state transition CV */
+ /* scheduler state */
+ int next_idx; /**< next queue to receive attention */
+ afs_uint32 last_promotion[VLRU_GENERATIONS-1]; /**< timestamp of last promotion scan */
+ afs_uint32 last_scan[VLRU_GENERATIONS+1]; /**< timestamp of last detach scan */
+
+ int scanner_state; /**< state of scanner thread */
+ pthread_cond_t cv; /**< state transition CV */
};
+/** global VLRU state */
static struct VLRU volume_LRU;
-/* valid scanner states */
-#define VLRU_SCANNER_STATE_OFFLINE 0
-#define VLRU_SCANNER_STATE_ONLINE 1
-#define VLRU_SCANNER_STATE_SHUTTING_DOWN 2
-#define VLRU_SCANNER_STATE_PAUSING 3
-#define VLRU_SCANNER_STATE_PAUSED 4
+/**
+ * defined states for VLRU scanner thread.
+ */
+typedef enum {
+ VLRU_SCANNER_STATE_OFFLINE = 0, /**< vlru scanner thread is offline */
+ VLRU_SCANNER_STATE_ONLINE = 1, /**< vlru scanner thread is online */
+ VLRU_SCANNER_STATE_SHUTTING_DOWN = 2, /**< vlru scanner thread is shutting down */
+ VLRU_SCANNER_STATE_PAUSING = 3, /**< vlru scanner thread is getting ready to pause */
+ VLRU_SCANNER_STATE_PAUSED = 4 /**< vlru scanner thread is paused */
+} vlru_thread_state_t;
/* vlru disk data header stuff */
-#define VLRU_DISK_MAGIC 0x7a8b9cad
-#define VLRU_DISK_VERSION 1
+#define VLRU_DISK_MAGIC 0x7a8b9cad /**< vlru disk entry magic number */
+#define VLRU_DISK_VERSION 1 /**< vlru disk entry version number */
-/* vlru default expiration time (for eventual fs state serialization of vlru data) */
+/** vlru default expiration time (for eventual fs state serialization of vlru data) */
#define VLRU_DUMP_EXPIRATION_TIME (60*60*24*7) /* expire vlru data after 1 week */
+/** minimum volume inactivity (in seconds) before a volume becomes eligible for
+ * soft detachment. */
static afs_uint32 VLRU_offline_thresh = VLRU_DEFAULT_OFFLINE_THRESH;
+
+/** time interval (in seconds) between VLRU scanner thread soft detach passes. */
static afs_uint32 VLRU_offline_interval = VLRU_DEFAULT_OFFLINE_INTERVAL;
+
+/** maximum number of volumes to soft detach in a VLRU soft detach pass. */
static afs_uint32 VLRU_offline_max = VLRU_DEFAULT_OFFLINE_MAX;
+
+/** VLRU control flag. non-zero value implies VLRU subsystem is activated. */
static afs_uint32 VLRU_enabled = 1;
/* queue synchronization routines */
static void VLRU_EndExclusive_r(struct VLRU_q * q);
static void VLRU_Wait_r(struct VLRU_q * q);
-/* set the VLRU parameters
+/**
+ * set VLRU subsystem tunable parameters.
+ *
+ * @param[in] option tunable option to modify
+ * @param[in] val new value for tunable parameter
*
- * valid options are:
- * VLRU_SET_THRESH -- set the period of inactivity after
- * which volumes are eligible for being detached
- * VLRU_SET_INTERVAL -- the time interval between calls
- * to the volume LRU "garbage collector"
- * VLRU_SET_MAX -- the max number of volumes to deallocate
- * in one GC pass
+ * @pre @c VInitVolumePackage has not yet been called.
+ *
+ * @post tunable parameter is modified
+ *
+ * @note DAFS only
+ *
+ * @note valid option parameters are:
+ * @arg @c VLRU_SET_THRESH
+ * set the period of inactivity after which
+ * volumes are eligible for soft detachment
+ * @arg @c VLRU_SET_INTERVAL
+ * set the time interval between calls
+ * to the volume LRU "garbage collector"
+ * @arg @c VLRU_SET_MAX
+ * set the max number of volumes to deallocate
+ * in one GC pass
*/
void
VLRU_SetOptions(int option, afs_uint32 val)
VLRU_ComputeConstants();
}
-/* compute the VLRU internal timing parameters based upon the user's inputs */
+/**
+ * compute VLRU internal timing parameters.
+ *
+ * @post VLRU scanner thread internal timing parameters are computed
+ *
+ * @note computes internal timing parameters based upon user-modifiable
+ * tunable parameters.
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VLRU_ComputeConstants(void)
{
}
}
-/* initialize VLRU */
+/**
+ * initialize VLRU subsystem.
+ *
+ * @pre this function has not yet been called
+ *
+ * @post VLRU subsystem is initialized and VLRU scanner thread is starting
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VInitVLRU(void)
{
}
}
-/* initialize LRU support for a volume */
+/**
+ * initialize the VLRU-related fields of a newly allocated volume object.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre
+ * @arg @c VOL_LOCK is held.
+ * @arg volume object is not on a VLRU queue.
+ *
+ * @post VLRU fields are initialized to indicate that volume object is not
+ * currently registered with the VLRU subsystem
+ *
+ * @note DAFS only
+ *
+ * @internal volume package interal use only.
+ */
static void
VLRU_Init_Node_r(volatile Volume * vp)
{
vp->vlru.idx = VLRU_QUEUE_INVALID;
}
-/* add volume to VLRU
- * now supports adding to queues other
- * than new for vlru state restore
- * caller MUST hold a ref count on vp */
+/**
+ * add a volume object to a VLRU queue.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre
+ * @arg @c VOL_LOCK is held.
+ * @arg caller MUST hold a lightweight ref on @p vp.
+ * @arg caller MUST NOT hold exclusive ownership of the VLRU queue.
+ *
+ * @post the volume object is added to the appropriate VLRU queue
+ *
+ * @note if @c vp->vlru.idx contains the index of a valid VLRU queue,
+ * then the volume is added to that queue. Otherwise, the value
+ * @c VLRU_QUEUE_NEW is stored into @c vp->vlru.idx and the
+ * volume is added to the NEW generation queue.
+ *
+ * @note @c VOL_LOCK may be dropped internally
+ *
+ * @note Volume state is temporarily set to @c VOL_STATE_VLRU_ADD
+ * during the add operation, and is restored to the previous
+ * state prior to return.
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VLRU_Add_r(volatile Volume * vp)
{
int idx;
+ VolState state_save;
if (!VLRU_enabled)
return;
if (queue_IsOnQueue(&vp->vlru))
return;
- VLRU_Wait_r(&volume_LRU.q[VLRU_QUEUE_NEW]);
+ state_save = VChangeState_r(vp, VOL_STATE_VLRU_ADD);
+
+ idx = vp->vlru.idx;
+ if ((idx < 0) || (idx >= VLRU_QUEUE_INVALID)) {
+ idx = VLRU_QUEUE_NEW;
+ }
+
+ VLRU_Wait_r(&volume_LRU.q[idx]);
/* repeat check since VLRU_Wait_r may have dropped
* the glock */
if (queue_IsNotOnQueue(&vp->vlru)) {
- idx = vp->vlru.idx;
- if ((idx < 0) || (idx >= VLRU_QUEUE_INVALID)) {
- idx = vp->vlru.idx = VLRU_QUEUE_NEW;
- }
+ vp->vlru.idx = idx;
queue_Prepend(&volume_LRU.q[idx], &vp->vlru);
volume_LRU.q[idx].len++;
V_attachFlags(vp) |= VOL_ON_VLRU;
vp->stats.last_promote = FT_ApproxTime();
}
+
+ VChangeState_r(vp, state_save);
}
-/* delete volume from VLRU
- * caller MUST hold a ref count on vp */
+/**
+ * delete a volume object from a VLRU queue.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre
+ * @arg @c VOL_LOCK is held.
+ * @arg caller MUST hold a lightweight ref on @p vp.
+ * @arg caller MUST NOT hold exclusive ownership of the VLRU queue.
+ *
+ * @post volume object is removed from the VLRU queue
+ *
+ * @note @c VOL_LOCK may be dropped internally
+ *
+ * @note DAFS only
+ *
+ * @todo We should probably set volume state to something exlcusive
+ * (as @c VLRU_Add_r does) prior to dropping @c VOL_LOCK.
+ *
+ * @internal volume package internal use only.
+ */
static void
VLRU_Delete_r(volatile Volume * vp)
{
V_attachFlags(vp) &= ~(VOL_ON_VLRU);
}
-/* signal that volume was just accessed.
- * caller MUST hold a ref count on vp */
+/**
+ * tell the VLRU subsystem that a volume was just accessed.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre
+ * @arg @c VOL_LOCK is held
+ * @arg caller MUST hold a lightweight ref on @p vp
+ * @arg caller MUST NOT hold exclusive ownership of any VLRU queue
+ *
+ * @post volume VLRU access statistics are updated. If the volume was on
+ * the VLRU soft detach candidate queue, it is moved to the NEW
+ * generation queue.
+ *
+ * @note @c VOL_LOCK may be dropped internally
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VLRU_UpdateAccess_r(volatile Volume * vp)
{
}
}
-/* switch a volume between two VLRU queues */
+/**
+ * switch a volume between two VLRU queues.
+ *
+ * @param[in] vp pointer to volume object
+ * @param[in] new_idx index of VLRU queue onto which the volume will be moved
+ * @param[in] append controls whether the volume will be appended or
+ * prepended to the queue. A nonzero value means it will
+ * be appended; zero means it will be prepended.
+ *
+ * @pre The new (and old, if applicable) queue(s) must either be owned
+ * exclusively by the calling thread for asynchronous manipulation,
+ * or the queue(s) must be quiescent and VOL_LOCK must be held.
+ * Please see VLRU_BeginExclusive_r, VLRU_EndExclusive_r and VLRU_Wait_r
+ * for further details of the queue asynchronous processing mechanism.
+ *
+ * @post If the volume object was already on a VLRU queue, it is
+ * removed from the queue. Depending on the value of the append
+ * parameter, the volume object is either appended or prepended
+ * to the VLRU queue referenced by the new_idx parameter.
+ *
+ * @note DAFS only
+ *
+ * @see VLRU_BeginExclusive_r
+ * @see VLRU_EndExclusive_r
+ * @see VLRU_Wait_r
+ *
+ * @internal volume package internal use only.
+ */
static void
VLRU_SwitchQueues(volatile Volume * vp, int new_idx, int append)
{
vp->vlru.idx = new_idx;
}
-/* VLRU GC thread */
+/**
+ * VLRU background thread.
+ *
+ * The VLRU Scanner Thread is responsible for periodically scanning through
+ * each VLRU queue looking for volumes which should be moved to another
+ * queue, or soft detached.
+ *
+ * @param[in] args unused thread arguments parameter
+ *
+ * @return unused thread return value
+ * @retval NULL always
+ *
+ * @internal volume package internal use only.
+ */
static void *
VLRU_ScannerThread(void * args)
{
volume_LRU.scanner_state = VLRU_SCANNER_STATE_PAUSED;
assert(pthread_cond_broadcast(&volume_LRU.cv) == 0);
do {
- assert(pthread_cond_wait(&volume_LRU.cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&volume_LRU.cv);
} while (volume_LRU.scanner_state == VLRU_SCANNER_STATE_PAUSED);
}
return NULL;
}
-/* run the promotions */
+/**
+ * promote volumes from one VLRU generation to the next.
+ *
+ * This routine scans a VLRU generation looking for volumes which are
+ * eligible to be promoted to the next generation. All volumes which
+ * meet the eligibility requirement are promoted.
+ *
+ * Promotion eligibility is based upon meeting both of the following
+ * requirements:
+ *
+ * @arg The volume has been accessed since the last promotion:
+ * @c (vp->stats.last_get >= vp->stats.last_promote)
+ * @arg The last promotion occurred at least
+ * @c volume_LRU.promotion_interval[idx] seconds ago
+ *
+ * As a performance optimization, promotions are "globbed". In other
+ * words, we promote arbitrarily large contiguous sublists of elements
+ * as one operation.
+ *
+ * @param[in] idx VLRU queue index to scan
+ *
+ * @note DAFS only
+ *
+ * @internal VLRU internal use only.
+ */
static void
VLRU_Promote_r(int idx)
{
VLRU_Wait_r(struct VLRU_q * q)
{
while(q->busy) {
- assert(pthread_cond_wait(&q->cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&q->cv);
}
}
if (vp->nUsers || vp->nWaiters)
return 0;
- if (IsExclusiveState(V_attachState(vp))) {
+ if (VIsExclusiveState(V_attachState(vp))) {
return 0;
}
/* Volume Header Cache routines */
/***************************************************/
+/**
+ * volume header cache.
+ */
struct volume_hdr_LRU_t volume_hdr_LRU;
-/* Allocate a bunch of headers; string them together */
+/**
+ * initialize the volume header cache.
+ *
+ * @param[in] howMany number of header cache entries to preallocate
+ *
+ * @pre VOL_LOCK held. Function has never been called before.
+ *
+ * @post howMany cache entries are allocated, initialized, and added
+ * to the LRU list. Header cache statistics are initialized.
+ *
+ * @note only applicable to fileServer program type. Should only be
+ * called once during volume package initialization.
+ *
+ * @internal volume package internal use only.
+ */
static void
VInitVolumeHeaderCache(afs_uint32 howMany)
{
if (programType != fileServer)
return;
queue_Init(&volume_hdr_LRU);
-#ifdef AFS_DEMAND_ATTACH_FS
volume_hdr_LRU.stats.free = 0;
volume_hdr_LRU.stats.used = howMany;
volume_hdr_LRU.stats.attached = 0;
-#endif
hp = (struct volHeader *)(calloc(howMany, sizeof(struct volHeader)));
while (howMany--)
ReleaseVolumeHeader(hp++);
}
-#ifdef AFS_DEMAND_ATTACH_FS
-/* Get a volume header from the LRU list; update the old one if necessary */
-/* Returns 1 if there was already a header, which is removed from the LRU list */
-/* caller MUST has a ref count on vp */
+/**
+ * get a volume header and attach it to the volume object.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @return cache entry status
+ * @retval 0 volume header was newly attached; cache data is invalid
+ * @retval 1 volume header was previously attached; cache data is valid
+ *
+ * @pre VOL_LOCK held. For DAFS, lightweight ref must be held on volume object.
+ *
+ * @post volume header attached to volume object. if necessary, header cache
+ * entry on LRU is synchronized to disk. Header is removed from LRU list.
+ *
+ * @note VOL_LOCK may be dropped
+ *
+ * @warning this interface does not load header data from disk. it merely
+ * attaches a header object to the volume object, and may sync the old
+ * header cache data out to disk in the process.
+ *
+ * @internal volume package internal use only.
+ */
static int
GetVolumeHeader(register Volume * vp)
{
int old;
static int everLogged = 0;
+#ifdef AFS_DEMAND_ATTACH_FS
+ VolState vp_save, back_save;
+
/* XXX debug 9/19/05 we've apparently got
* a ref counting bug somewhere that's
* breaking the nUsers == 0 => header on LRU
Log("nUsers == 0, but header not on LRU\n");
return 1;
}
+#endif
old = (vp->header != NULL); /* old == volume already has a header */
assert(hd != NULL);
vp->header = hd;
hd->back = vp;
+#ifdef AFS_DEMAND_ATTACH_FS
V_attachFlags(vp) |= VOL_HDR_ATTACHED;
+#endif
}
} else {
+ /* for the fileserver, we keep a volume header cache */
if (old) {
/* the header we previously dropped in the lru is
* still available. pull it off the lru and return */
volume_hdr_LRU.stats.free++;
}
if (hd->back) {
- VolState vp_save, back_save;
/* this header used to belong to someone else.
* we'll need to check if the header needs to
* be sync'd out to disk */
+#ifdef AFS_DEMAND_ATTACH_FS
/* if hd->back were in an exclusive state, then
* its volHeader would not be on the LRU... */
- assert(!IsExclusiveState(V_attachState(hd->back)));
+ assert(!VIsExclusiveState(V_attachState(hd->back)));
+#endif
if (hd->diskstuff.inUse) {
/* volume was in use, so we'll need to sync
* its header to disk */
+
+#ifdef AFS_DEMAND_ATTACH_FS
back_save = VChangeState_r(hd->back, VOL_STATE_UPDATING);
vp_save = VChangeState_r(vp, VOL_STATE_HDR_ATTACHING);
VCreateReservation_r(hd->back);
VOL_UNLOCK;
+#endif
WriteVolumeHeader_r(&error, hd->back);
/* Ignore errors; catch them later */
+#ifdef AFS_DEMAND_ATTACH_FS
VOL_LOCK;
+#endif
}
- V_attachFlags(hd->back) &= ~(VOL_HDR_ATTACHED | VOL_HDR_LOADED | VOL_HDR_IN_LRU);
hd->back->header = NULL;
+#ifdef AFS_DEMAND_ATTACH_FS
+ V_attachFlags(hd->back) &= ~(VOL_HDR_ATTACHED | VOL_HDR_LOADED | VOL_HDR_IN_LRU);
if (hd->diskstuff.inUse) {
VChangeState_r(hd->back, back_save);
VCancelReservation_r(hd->back);
VChangeState_r(vp, vp_save);
}
+#endif
} else {
volume_hdr_LRU.stats.attached++;
}
hd->back = vp;
vp->header = hd;
+#ifdef AFS_DEMAND_ATTACH_FS
V_attachFlags(vp) |= VOL_HDR_ATTACHED;
+#endif
}
volume_hdr_LRU.stats.free--;
volume_hdr_LRU.stats.used++;
}
IncUInt64(&VStats.hdr_gets);
+#ifdef AFS_DEMAND_ATTACH_FS
IncUInt64(&vp->stats.hdr_gets);
vp->stats.last_hdr_get = FT_ApproxTime();
+#endif
return old;
}
-#else /* AFS_DEMAND_ATTACH_FS */
-/* Get a volume header from the LRU list; update the old one if necessary */
-/* Returns 1 if there was already a header, which is removed from the LRU list */
-static int
-GetVolumeHeader(register Volume * vp)
-{
- Error error;
- register struct volHeader *hd;
- int old;
- static int everLogged = 0;
-
- old = (vp->header != NULL); /* old == volume already has a header */
-
- if (programType != fileServer) {
- /* for volume utilities, we allocate volHeaders as needed */
- if (!vp->header) {
- hd = (struct volHeader *)calloc(1, sizeof(*vp->header));
- assert(hd != NULL);
- vp->header = hd;
- hd->back = vp;
- }
- } else {
- /* for the fileserver, we keep a volume header cache */
- if (old) {
- /* the header we previously dropped in the lru is
- * still available. pull it off the lru and return */
- hd = vp->header;
- queue_Remove(hd);
- assert(hd->back == vp);
- } else {
- /* we need to grab a new element off the LRU */
- if (queue_IsNotEmpty(&volume_hdr_LRU)) {
- /* grab an element */
- hd = queue_First(&volume_hdr_LRU, volHeader);
- queue_Remove(hd);
- } else {
- /* LRU is empty, so allocate a new volHeader
- * this is probably indicative of a leak, so let the user know */
- hd = (struct volHeader *)calloc(1, sizeof(struct volHeader));
- assert(hd != NULL);
- if (!everLogged) {
- Log("****Allocated more volume headers, probably leak****\n");
- everLogged = 1;
- }
- }
- if (hd->back) {
- /* this header used to belong to someone else.
- * we'll need to check if the header needs to
- * be sync'd out to disk */
-
- if (hd->diskstuff.inUse) {
- WriteVolumeHeader_r(&error, hd->back);
- /* Ignore errors; catch them later */
- }
- hd->back->header = NULL;
- }
- hd->back = vp;
- vp->header = hd;
- }
- }
- return old;
-}
-#endif /* AFS_DEMAND_ATTACH_FS */
-/* make sure a volume header is attached to
- * vp, and has the correct data loaded from
- * disk. */
-#ifdef AFS_DEMAND_ATTACH_FS
-/* caller MUST hold a ref count on vp */
+/**
+ * make sure volume header is attached and contains valid cache data.
+ *
+ * @param[out] ec outbound error code
+ * @param[in] vp pointer to volume object
+ *
+ * @pre VOL_LOCK held. For DAFS, lightweight ref held on vp.
+ *
+ * @post header cache entry attached, and loaded with valid data, or
+ * *ec is nonzero, and the header is released back into the LRU.
+ *
+ * @internal volume package internal use only.
+ */
static void
LoadVolumeHeader(Error * ec, Volume * vp)
{
+#ifdef AFS_DEMAND_ATTACH_FS
VolState state_save;
+ afs_uint32 now;
*ec = 0;
if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
VOLUMEINFOVERSION);
IncUInt64(&vp->stats.hdr_loads);
+ now = FT_ApproxTime();
VOL_LOCK;
- if (!*ec)
+ if (!*ec) {
V_attachFlags(vp) |= VOL_HDR_LOADED;
+ vp->stats.last_hdr_load = now;
+ }
VChangeState_r(vp, state_save);
}
- if (*ec) {
- /* maintain (nUsers==0) => header in LRU invariant */
- ReleaseVolumeHeader(vp->header);
- }
-}
#else /* AFS_DEMAND_ATTACH_FS */
-static void
-LoadVolumeHeader(Error * ec, Volume * vp)
-{
*ec = 0;
if (vp->nUsers == 0 && !GetVolumeHeader(vp)) {
IncUInt64(&VStats.hdr_loads);
sizeof(V_disk(vp)), VOLUMEINFOMAGIC,
VOLUMEINFOVERSION);
}
+#endif /* AFS_DEMAND_ATTACH_FS */
if (*ec) {
/* maintain (nUsers==0) => header in LRU invariant */
ReleaseVolumeHeader(vp->header);
}
}
-#endif /* AFS_DEMAND_ATTACH_FS */
-/* Put it at the top of the LRU chain */
+/**
+ * release a header cache entry back into the LRU list.
+ *
+ * @param[in] hd pointer to volume header cache object
+ *
+ * @pre VOL_LOCK held.
+ *
+ * @post header cache object appended onto end of LRU list.
+ *
+ * @note only applicable to fileServer program type.
+ *
+ * @note used to place a header cache entry back into the
+ * LRU pool without invalidating it as a cache entry.
+ *
+ * @internal volume package internal use only.
+ */
static void
ReleaseVolumeHeader(register struct volHeader *hd)
{
if (hd->back) {
V_attachFlags(hd->back) |= VOL_HDR_IN_LRU;
}
+#endif
volume_hdr_LRU.stats.free++;
volume_hdr_LRU.stats.used--;
-#endif
}
-/* for fileserver, return header to LRU, and
- * invalidate it as a cache entry.
+/**
+ * free/invalidate a volume header cache entry.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post For fileserver, header cache entry is returned to LRU, and it is
+ * invalidated as a cache entry. For volume utilities, the header
+ * cache entry is freed.
*
- * for volume utilities, free the heap space */
+ * @note For fileserver, this should be utilized instead of ReleaseVolumeHeader
+ * whenever it is necessary to invalidate the header cache entry.
+ *
+ * @see ReleaseVolumeHeader
+ *
+ * @internal volume package internal use only.
+ */
static void
FreeVolumeHeader(register Volume * vp)
{
}
#ifdef AFS_DEMAND_ATTACH_FS
V_attachFlags(vp) &= ~(VOL_HDR_ATTACHED | VOL_HDR_IN_LRU | VOL_HDR_LOADED);
- volume_hdr_LRU.stats.attached--;
#endif
+ volume_hdr_LRU.stats.attached--;
vp->header = NULL;
}
/* Volume Hash Table routines */
/***************************************************/
+/**
+ * set size of volume object hash table.
+ *
+ * @param[in] logsize log(2) of desired hash table size
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -1 failure
+ *
+ * @pre MUST be called prior to VInitVolumePackage
+ *
+ * @post Volume Hash Table will have 2^logsize buckets
+ */
int
VSetVolHashSize(int logsize)
{
return 0;
}
+/**
+ * initialize dynamic data structures for volume hash table.
+ *
+ * @post hash table is allocated, and fields are initialized.
+ *
+ * @internal volume package internal use only.
+ */
static void
VInitVolumeHash(void)
{
}
}
-/* for demand-attach, caller MUST hold a ref count on vp */
+/**
+ * add a volume object to the hash table.
+ *
+ * @param[in] vp pointer to volume object
+ * @param[in] hashid hash of volume id
+ *
+ * @pre VOL_LOCK is held. For DAFS, caller must hold a lightweight
+ * reference on vp.
+ *
+ * @post volume is added to hash chain.
+ *
+ * @internal volume package internal use only.
+ *
+ * @note For DAFS, VOL_LOCK may be dropped in order to wait for an
+ * asynchronous hash chain reordering to finish.
+ */
static void
AddVolumeToHashTable(register Volume * vp, int hashid)
{
vp->vnodeHashOffset = VolumeHashOffset_r();
}
-/* for demand-attach, caller MUST hold a ref count on vp */
+/**
+ * delete a volume object from the hash table.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre VOL_LOCK is held. For DAFS, caller must hold a lightweight
+ * reference on vp.
+ *
+ * @post volume is removed from hash chain.
+ *
+ * @internal volume package internal use only.
+ *
+ * @note For DAFS, VOL_LOCK may be dropped in order to wait for an
+ * asynchronous hash chain reordering to finish.
+ */
static void
DeleteVolumeFromHashTable(register Volume * vp)
{
* after the volume is removed from the hash */
}
-/* - look up a volume id in the hash table
- * - occasionally rebalance hash chains
- * - update lookup statistics accordingly
+/**
+ * lookup a volume object in the hash table given a volume id.
+ *
+ * @param[out] ec error code return
+ * @param[in] volumeId volume id
+ * @param[in] hint volume object which we believe could be the correct
+ mapping
+ *
+ * @return volume object pointer
+ * @retval NULL no such volume id is registered with the hash table.
+ *
+ * @pre VOL_LOCK is held. For DAFS, caller must hold a lightweight
+ ref on hint.
+ *
+ * @post volume object with the given id is returned. volume object and
+ * hash chain access statistics are updated. hash chain may have
+ * been reordered.
+ *
+ * @note For DAFS, VOL_LOCK may be dropped in order to wait for an
+ * asynchronous hash chain reordering operation to finish, or
+ * in order for us to perform an asynchronous chain reordering.
+ *
+ * @note Hash chain reorderings occur when the access count for the
+ * volume object being looked up exceeds the sum of the previous
+ * node's (the node ahead of it in the hash chain linked list)
+ * access count plus the constant VOLUME_HASH_REORDER_THRESHOLD.
+ *
+ * @note For DAFS, the hint parameter allows us to short-circuit if the
+ * cacheCheck fields match between the hash chain head and the
+ * hint volume object.
*/
-/* the hint parameter allows us to short-circuit on
- * DEMAND_ATTACH_FS if the cacheChecks match between
- * the hash chain head and hint
- * caller MUST hold a refcount on hint */
Volume *
VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint)
{
/* demand-attach fs volume hash
* asynchronous exclusive operations */
-/* take exclusive control over the hash chain */
+/**
+ * begin an asynchronous exclusive operation on a volume hash chain.
+ *
+ * @param[in] head pointer to volume hash chain head object
+ *
+ * @pre VOL_LOCK held. hash chain is quiescent.
+ *
+ * @post hash chain marked busy.
+ *
+ * @note this interface is used in conjunction with VHashEndExclusive_r and
+ * VHashWait_r to perform asynchronous (wrt VOL_LOCK) operations on a
+ * volume hash chain. Its main use case is hash chain reordering, which
+ * has the potential to be a highly latent operation.
+ *
+ * @see VHashEndExclusive_r
+ * @see VHashWait_r
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VHashBeginExclusive_r(VolumeHashChainHead * head)
{
head->busy = 1;
}
-/* relinquish exclusive control over the hash chain */
+/**
+ * relinquish exclusive ownership of a volume hash chain.
+ *
+ * @param[in] head pointer to volume hash chain head object
+ *
+ * @pre VOL_LOCK held. thread owns the hash chain exclusively.
+ *
+ * @post hash chain is marked quiescent. threads awaiting use of
+ * chain are awakened.
+ *
+ * @see VHashBeginExclusive_r
+ * @see VHashWait_r
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VHashEndExclusive_r(VolumeHashChainHead * head)
{
assert(pthread_cond_broadcast(&head->chain_busy_cv) == 0);
}
-/* wait for another thread to finish its exclusive ops */
+/**
+ * wait for all asynchronous operations on a hash chain to complete.
+ *
+ * @param[in] head pointer to volume hash chain head object
+ *
+ * @pre VOL_LOCK held.
+ *
+ * @post hash chain object is quiescent.
+ *
+ * @see VHashBeginExclusive_r
+ * @see VHashEndExclusive_r
+ *
+ * @note DAFS only
+ *
+ * @note This interface should be called before any attempt to
+ * traverse the hash chain. It is permissible for a thread
+ * to gain exclusive access to the chain, and then perform
+ * latent operations on the chain asynchronously wrt the
+ * VOL_LOCK.
+ *
+ * @warning if waiting is necessary, VOL_LOCK is dropped
+ *
+ * @internal volume package internal use only.
+ */
static void
VHashWait_r(VolumeHashChainHead * head)
{
while (head->busy) {
- assert(pthread_cond_wait(&head->chain_busy_cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&head->chain_busy_cv);
}
}
#endif /* AFS_DEMAND_ATTACH_FS */
*/
#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * add a volume to its disk partition VByPList.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre either the disk partition VByPList is owned exclusively
+ * by the calling thread, or the list is quiescent and
+ * VOL_LOCK is held.
+ *
+ * @post volume is added to disk partition VByPList
+ *
+ * @note DAFS only
+ *
+ * @warning it is the caller's responsibility to ensure list
+ * quiescence.
+ *
+ * @see VVByPListWait_r
+ * @see VVByPListBeginExclusive_r
+ * @see VVByPListEndExclusive_r
+ *
+ * @internal volume package internal use only.
+ */
static void
AddVolumeToVByPList_r(Volume * vp)
{
}
}
+/**
+ * delete a volume from its disk partition VByPList.
+ *
+ * @param[in] vp pointer to volume object
+ *
+ * @pre either the disk partition VByPList is owned exclusively
+ * by the calling thread, or the list is quiescent and
+ * VOL_LOCK is held.
+ *
+ * @post volume is removed from the disk partition VByPList
+ *
+ * @note DAFS only
+ *
+ * @warning it is the caller's responsibility to ensure list
+ * quiescence.
+ *
+ * @see VVByPListWait_r
+ * @see VVByPListBeginExclusive_r
+ * @see VVByPListEndExclusive_r
+ *
+ * @internal volume package internal use only.
+ */
static void
DeleteVolumeFromVByPList_r(Volume * vp)
{
}
}
+/**
+ * begin an asynchronous exclusive operation on a VByPList.
+ *
+ * @param[in] dp pointer to disk partition object
+ *
+ * @pre VOL_LOCK held. VByPList is quiescent.
+ *
+ * @post VByPList marked busy.
+ *
+ * @note this interface is used in conjunction with VVByPListEndExclusive_r and
+ * VVByPListWait_r to perform asynchronous (wrt VOL_LOCK) operations on a
+ * VByPList.
+ *
+ * @see VVByPListEndExclusive_r
+ * @see VVByPListWait_r
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
/* take exclusive control over the list */
static void
VVByPListBeginExclusive_r(struct DiskPartition * dp)
dp->vol_list.busy = 1;
}
-/* relinquish exclusive control over the list */
+/**
+ * relinquish exclusive ownership of a VByPList.
+ *
+ * @param[in] dp pointer to disk partition object
+ *
+ * @pre VOL_LOCK held. thread owns the VByPList exclusively.
+ *
+ * @post VByPList is marked quiescent. threads awaiting use of
+ * the list are awakened.
+ *
+ * @see VVByPListBeginExclusive_r
+ * @see VVByPListWait_r
+ *
+ * @note DAFS only
+ *
+ * @internal volume package internal use only.
+ */
static void
VVByPListEndExclusive_r(struct DiskPartition * dp)
{
assert(pthread_cond_broadcast(&dp->vol_list.cv) == 0);
}
-/* wait for another thread to finish its exclusive ops */
+/**
+ * wait for all asynchronous operations on a VByPList to complete.
+ *
+ * @param[in] dp pointer to disk partition object
+ *
+ * @pre VOL_LOCK is held.
+ *
+ * @post disk partition's VByP list is quiescent
+ *
+ * @note DAFS only
+ *
+ * @note This interface should be called before any attempt to
+ * traverse the VByPList. It is permissible for a thread
+ * to gain exclusive access to the list, and then perform
+ * latent operations on the list asynchronously wrt the
+ * VOL_LOCK.
+ *
+ * @warning if waiting is necessary, VOL_LOCK is dropped
+ *
+ * @see VVByPListEndExclusive_r
+ * @see VVByPListBeginExclusive_r
+ *
+ * @internal volume package internal use only.
+ */
static void
VVByPListWait_r(struct DiskPartition * dp)
{
while (dp->vol_list.busy) {
- assert(pthread_cond_wait(&dp->vol_list.cv, &vol_glock_mutex) == 0);
+ VOL_CV_WAIT(&dp->vol_list.cv);
}
}
#endif /* AFS_DEMAND_ATTACH_FS */
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
*
- * Portions Copyright (c) 2006 Sine Nomine Associates
+ * Portions Copyright (c) 2006-2008 Sine Nomine Associates
*/
/*
#include "daemon_com.h"
#include "fssync.h"
+#if 0
+/** turn this on if you suspect a volume package locking bug */
+#define VOL_LOCK_DEBUG 1
+#endif
+
+#ifdef VOL_LOCK_DEBUG
+#define VOL_LOCK_ASSERT_HELD \
+ assert(vol_glock_holder == pthread_self())
+#define VOL_LOCK_ASSERT_UNHELD \
+ assert(vol_glock_holder == 0)
+#define _VOL_LOCK_SET_HELD \
+ vol_glock_holder = pthread_self()
+#define _VOL_LOCK_SET_UNHELD \
+ vol_glock_holder = 0
+#define VOL_LOCK_DBG_CV_WAIT_END \
+ do { \
+ VOL_LOCK_ASSERT_UNHELD; \
+ _VOL_LOCK_SET_HELD; \
+ } while(0)
+#define VOL_LOCK_DBG_CV_WAIT_BEGIN \
+ do { \
+ VOL_LOCK_ASSERT_HELD; \
+ _VOL_LOCK_SET_UNHELD; \
+ } while(0)
+#else
+#define VOL_LOCK_ASSERT_HELD
+#define VOL_LOCK_ASSERT_UNHELD
+#define VOL_LOCK_DBG_CV_WAIT_BEGIN
+#define VOL_LOCK_DBG_CV_WAIT_END
+#endif
+
+
#ifdef AFS_PTHREAD_ENV
#include <assert.h>
#include <pthread.h>
extern pthread_cond_t vol_put_volume_cond;
extern pthread_cond_t vol_sleep_cond;
extern int vol_attach_threads;
+#ifdef VOL_LOCK_DEBUG
+extern pthread_t vol_glock_holder;
+#define VOL_LOCK \
+ do { \
+ assert(pthread_mutex_lock(&vol_glock_mutex) == 0); \
+ assert(vol_glock_holder == 0); \
+ vol_glock_holder = pthread_self(); \
+ } while (0)
+#define VOL_UNLOCK \
+ do { \
+ VOL_LOCK_ASSERT_HELD; \
+ vol_glock_holder = 0; \
+ assert(pthread_mutex_unlock(&vol_glock_mutex) == 0); \
+ } while (0)
+#define VOL_CV_WAIT(cv) \
+ do { \
+ VOL_LOCK_DBG_CV_WAIT_BEGIN; \
+ assert(pthread_cond_wait((cv), &vol_glock_mutex) == 0); \
+ VOL_LOCK_DBG_CV_WAIT_END; \
+ } while (0)
+#else /* !VOL_LOCK_DEBUG */
#define VOL_LOCK \
assert(pthread_mutex_lock(&vol_glock_mutex) == 0)
#define VOL_UNLOCK \
assert(pthread_mutex_unlock(&vol_glock_mutex) == 0)
+#define VOL_CV_WAIT(cv) assert(pthread_cond_wait((cv), &vol_glock_mutex) == 0)
+#endif /* !VOL_LOCK_DEBUG */
#define VSALVSYNC_LOCK \
assert(pthread_mutex_lock(&vol_salvsync_mutex) == 0)
#define VSALVSYNC_UNLOCK \
extern int (*vol_PollProc) ();
#define DOPOLL ((vol_PollProc)? (*vol_PollProc)() : 0)
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * variable error return code based upon programType and DAFS presence
+ */
+#define DAFS_VSALVAGE ((programType == fileServer) ? VSALVAGING : VSALVAGE)
+#else
+#define DAFS_VSALVAGE (VSALVAGE)
+#endif
+
struct versionStamp { /* Version stamp for critical volume files */
bit32 magic; /* Magic number */
bit32 version; /* Version number of this file, or software
};
#ifdef AFS_DEMAND_ATTACH_FS
-/*
- * demand attach fs
- * volume state machine
+/**
+ * demand attach volume state enumeration.
*
- * these must be contiguous in order for IsValidState() to work correctly
+ * @note values must be contiguous in order for VIsValidState() to work correctly
+ */
+typedef enum {
+ VOL_STATE_UNATTACHED = 0, /**< volume is unattached */
+ VOL_STATE_PREATTACHED = 1, /**< volume has been pre-attached */
+ VOL_STATE_ATTACHING = 2, /**< volume is transitioning to fully attached */
+ VOL_STATE_ATTACHED = 3, /**< volume has been fully attached */
+ VOL_STATE_UPDATING = 4, /**< volume is updating on-disk structures */
+ VOL_STATE_GET_BITMAP = 5, /**< volume is getting bitmap entries */
+ VOL_STATE_HDR_LOADING = 6, /**< volume is loading disk header */
+ VOL_STATE_HDR_ATTACHING = 7, /**< volume is getting a header from the LRU */
+ VOL_STATE_SHUTTING_DOWN = 8, /**< volume is shutting down */
+ VOL_STATE_GOING_OFFLINE = 9, /**< volume is going offline */
+ VOL_STATE_OFFLINING = 10, /**< volume is transitioning to offline */
+ VOL_STATE_DETACHING = 11, /**< volume is transitioning to detached */
+ VOL_STATE_SALVSYNC_REQ = 12, /**< volume is blocked on a salvsync request */
+ VOL_STATE_SALVAGING = 13, /**< volume is being salvaged */
+ VOL_STATE_ERROR = 14, /**< volume is in an error state */
+ VOL_STATE_VNODE_ALLOC = 15, /**< volume is busy allocating a new vnode */
+ VOL_STATE_VNODE_GET = 16, /**< volume is busy getting vnode disk data */
+ VOL_STATE_VNODE_CLOSE = 17, /**< volume is busy closing vnodes */
+ VOL_STATE_VNODE_RELEASE = 18, /**< volume is busy releasing vnodes */
+ VOL_STATE_VLRU_ADD = 19, /**< volume is busy being added to a VLRU queue */
+ /* please add new states directly above this line */
+ VOL_STATE_FREED = 20, /**< debugging aid */
+ VOL_STATE_COUNT = 21, /**< total number of valid states */
+} VolState;
+
+/**
+ * V_attachFlags bits.
*/
-#define VOL_STATE_UNATTACHED 0 /* volume is unattached */
-#define VOL_STATE_PREATTACHED 1 /* volume has been pre-attached */
-#define VOL_STATE_ATTACHING 2 /* volume is transitioning to fully attached */
-#define VOL_STATE_ATTACHED 3 /* volume has been fully attached */
-#define VOL_STATE_UPDATING 4 /* volume is updating on-disk structures */
-#define VOL_STATE_GET_BITMAP 5 /* volume is getting bitmap entries */
-#define VOL_STATE_HDR_LOADING 6 /* volume is loading disk header */
-#define VOL_STATE_HDR_ATTACHING 7 /* volume is getting a header from the LRU */
-#define VOL_STATE_SHUTTING_DOWN 8 /* volume is shutting down */
-#define VOL_STATE_GOING_OFFLINE 9 /* volume is going offline */
-#define VOL_STATE_OFFLINING 10 /* volume is transitioning to offline */
-#define VOL_STATE_DETACHING 11 /* volume is transitioning to detached */
-#define VOL_STATE_SALVSYNC_REQ 12 /* volume is blocked on a salvsync request */
-#define VOL_STATE_SALVAGING 13 /* volume is being salvaged */
-#define VOL_STATE_ERROR 14 /* volume is in an error state */
-#define VOL_STATE_FREED 15 /* debugging aid */
-
-#define VOL_STATE_COUNT 16 /* total number of valid states */
-
-/* V_attachFlags bits */
-#define VOL_HDR_ATTACHED 0x1 /* volume header is attached to Volume struct */
-#define VOL_HDR_LOADED 0x2 /* volume header contents are valid */
-#define VOL_HDR_IN_LRU 0x4 /* volume header is in LRU */
-#define VOL_IN_HASH 0x8 /* volume is in hash table */
-#define VOL_ON_VBYP_LIST 0x10 /* volume is on VByP list */
-#define VOL_IS_BUSY 0x20 /* volume is not to be free()d */
-#define VOL_ON_VLRU 0x40 /* volume is on the VLRU */
-#define VOL_HDR_DONTSALV 0x80 /* volume header DONTSALVAGE flag is set */
+enum VolFlags {
+ VOL_HDR_ATTACHED = 0x1, /**< volume header is attached to Volume struct */
+ VOL_HDR_LOADED = 0x2, /**< volume header contents are valid */
+ VOL_HDR_IN_LRU = 0x4, /**< volume header is in LRU */
+ VOL_IN_HASH = 0x8, /**< volume is in hash table */
+ VOL_ON_VBYP_LIST = 0x10, /**< volume is on VByP list */
+ VOL_IS_BUSY = 0x20, /**< volume is not to be free()d */
+ VOL_ON_VLRU = 0x40, /**< volume is on the VLRU */
+ VOL_HDR_DONTSALV = 0x80, /**< volume header DONTSALVAGE flag is set */
+};
/* VPrintExtendedCacheStats flags */
-#define VOL_STATS_PER_CHAIN 0x1 /* compute simple per-chain stats */
-#define VOL_STATS_PER_CHAIN2 0x2 /* compute per-chain stats that require scanning
- * every element of the chain */
+#define VOL_STATS_PER_CHAIN 0x1 /**< compute simple per-chain stats */
+#define VOL_STATS_PER_CHAIN2 0x2 /**< compute per-chain stats that require scanning
+ * every element of the chain */
/* VLRU_SetOptions options */
#define VLRU_SET_THRESH 1
#define VLRU_SET_MAX 3
#define VLRU_SET_ENABLED 4
-/* valid VLRU queue names */
-#define VLRU_QUEUE_NEW 0 /* LRU queue for new volumes */
-#define VLRU_QUEUE_MID 1 /* survivor generation */
-#define VLRU_QUEUE_OLD 2 /* old generation */
-#define VLRU_QUEUE_CANDIDATE 3 /* soft detach candidate pool */
-#define VLRU_QUEUE_HELD 4 /* volumes which are not allowed
- * to be soft detached */
-#define VLRU_QUEUE_INVALID 5 /* invalid queue id */
+/**
+ * VLRU queue names.
+ */
+typedef enum {
+ VLRU_QUEUE_NEW = 0, /**< LRU queue for new volumes */
+ VLRU_QUEUE_MID = 1, /**< survivor generation */
+ VLRU_QUEUE_OLD = 2, /**< old generation */
+ VLRU_QUEUE_CANDIDATE = 3, /**< soft detach candidate pool */
+ VLRU_QUEUE_HELD = 4, /* volumes which are not allowed
+ * to be soft detached */
+ VLRU_QUEUE_INVALID = 5, /**< invalid queue id */
+} VLRUQueueName;
/* default scanner timing parameters */
#define VLRU_DEFAULT_OFFLINE_THRESH (60*60*2) /* 2 hours */
/* Memory resident volume information */
/**************************************/
-/* global volume package stats */
+/**
+ * global volume package stats.
+ */
typedef struct VolPkgStats {
#ifdef AFS_DEMAND_ATTACH_FS
/*
*/
/* levels */
- afs_uint32 state_levels[VOL_STATE_COUNT];
+ afs_uint32 state_levels[VOL_STATE_COUNT]; /**< volume state transition counters */
/* counters */
- afs_uint64 hash_looks; /* number of hash chain element traversals */
- afs_uint64 hash_reorders; /* number of hash chain reorders */
- afs_uint64 salvages; /* online salvages since fileserver start */
- afs_uint64 vol_ops; /* volume operations since fileserver start */
+ afs_uint64 hash_looks; /**< number of hash chain element traversals */
+ afs_uint64 hash_reorders; /**< number of hash chain reorders */
+ afs_uint64 salvages; /**< online salvages since fileserver start */
+ afs_uint64 vol_ops; /**< volume operations since fileserver start */
#endif /* AFS_DEMAND_ATTACH_FS */
- afs_uint64 hdr_loads; /* header loads from disk */
- afs_uint64 hdr_gets; /* header pulls out of LRU */
- afs_uint64 attaches; /* volume attaches since fileserver start */
- afs_uint64 soft_detaches; /* soft detach ops since fileserver start */
+ afs_uint64 hdr_loads; /**< header loads from disk */
+ afs_uint64 hdr_gets; /**< header pulls out of LRU */
+ afs_uint64 attaches; /**< volume attaches since fileserver start */
+ afs_uint64 soft_detaches; /**< soft detach ops since fileserver start */
/* configuration parameters */
- afs_uint32 hdr_cache_size; /* size of volume header cache */
+ afs_uint32 hdr_cache_size; /**< size of volume header cache */
} VolPkgStats;
extern VolPkgStats VStats;
/*
* volume header cache supporting structures
*/
-#ifdef AFS_DEMAND_ATTACH_FS
struct volume_hdr_LRU_stats {
afs_uint32 free;
afs_uint32 used;
afs_uint32 attached;
};
-#endif
struct volume_hdr_LRU_t {
struct rx_queue lru;
-#ifdef AFS_DEMAND_ATTACH_FS
struct volume_hdr_LRU_stats stats;
-#endif
};
extern struct volume_hdr_LRU_t volume_hdr_LRU;
#ifdef AFS_DEMAND_ATTACH_FS
-/* demand attach fs
- * extended per-volume statistics
+/**
+ * DAFS extended per-volume statistics.
*
- * please note that this structure lives across the entire
- * lifetime of the fileserver process
+ * @note this data lives across the entire
+ * lifetime of the fileserver process
*/
typedef struct VolumeStats {
/* counters */
- afs_uint64 hash_lookups; /* hash table lookups */
- afs_uint64 hash_short_circuits; /* short circuited hash lookups (due to cacheCheck) */
- afs_uint64 hdr_loads; /* header loads from disk */
- afs_uint64 hdr_gets; /* header pulls out of LRU */
- afs_uint16 attaches; /* attaches of this volume since fileserver start */
- afs_uint16 soft_detaches; /* soft detaches of this volume */
- afs_uint16 salvages; /* online salvages since fileserver start */
- afs_uint16 vol_ops; /* volume operations since fileserver start */
+ afs_uint64 hash_lookups; /**< hash table lookups */
+ afs_uint64 hash_short_circuits; /**< short circuited hash lookups (due to cacheCheck) */
+ afs_uint64 hdr_loads; /**< header loads from disk */
+ afs_uint64 hdr_gets; /**< header pulls out of LRU */
+ afs_uint16 attaches; /**< attaches of this volume since fileserver start */
+ afs_uint16 soft_detaches; /**< soft detaches of this volume */
+ afs_uint16 salvages; /**< online salvages since fileserver start */
+ afs_uint16 vol_ops; /**< volume operations since fileserver start */
/* timestamps */
- afs_uint32 last_attach; /* unix timestamp of last VAttach */
- afs_uint32 last_get; /* unix timestamp of last VGet/VHold */
- afs_uint32 last_promote; /* unix timestamp of last VLRU promote/demote */
- afs_uint32 last_hdr_get; /* unix timestamp of last GetVolumeHeader() */
- afs_uint32 last_salvage; /* unix timestamp of last initiation of an online salvage */
- afs_uint32 last_salvage_req; /* unix timestamp of last SALVSYNC request */
- afs_uint32 last_vol_op; /* unix timestamp of last volume operation */
+ afs_uint32 last_attach; /**< unix timestamp of last VAttach */
+ afs_uint32 last_get; /**< unix timestamp of last VGet/VHold */
+ afs_uint32 last_promote; /**< unix timestamp of last VLRU promote/demote */
+ afs_uint32 last_hdr_get; /**< unix timestamp of last GetVolumeHeader() */
+ afs_uint32 last_hdr_load; /**< unix timestamp of last LoadVolumeHeader() */
+ afs_uint32 last_salvage; /**< unix timestamp of last initiation of an online salvage */
+ afs_uint32 last_salvage_req; /**< unix timestamp of last SALVSYNC request */
+ afs_uint32 last_vol_op; /**< unix timestamp of last volume operation */
} VolumeStats;
-/* demand attach fs
- * online salvager state */
+/**
+ * DAFS online salvager state.
+ */
typedef struct VolumeOnlineSalvage {
- afs_uint32 prio; /* number of VGetVolume's since salvage requested */
- int reason; /* reason for requesting online salvage */
- byte requested; /* flag specifying that salvage should be scheduled */
- byte scheduled; /* flag specifying whether online salvage scheduled */
- byte reserved[2]; /* padding */
+ afs_uint32 prio; /**< number of VGetVolume's since salvage requested */
+ int reason; /**< reason for requesting online salvage */
+ byte requested; /**< flag specifying that salvage should be scheduled */
+ byte scheduled; /**< flag specifying whether online salvage scheduled */
+ byte reserved[2]; /**< padding */
} VolumeOnlineSalvage;
-/* demand attach fs
- * volume LRU state */
+/**
+ * DAFS Volume LRU state.
+ */
typedef struct VolumeVLRUState {
- struct rx_queue lru; /* VLRU queue pointers */
- int idx; /* VLRU generation index */
+ struct rx_queue lru; /**< VLRU queue for this generation */
+ VLRUQueueName idx; /**< VLRU generation index */
} VolumeVLRUState;
-
-typedef afs_uint16 VolState; /* attachment state type */
#endif /* AFS_DEMAND_ATTACH_FS */
typedef struct Volume {
afs_uint32 updateTime; /* Time that this volume was put on the updated
* volume list--the list of volumes that will be
* salvaged should the file server crash */
+ struct rx_queue vnode_list; /**< linked list of cached vnodes for this volume */
#ifdef AFS_DEMAND_ATTACH_FS
VolState attach_state; /* what stage of attachment has been completed */
- afs_uint16 attach_flags; /* flags related to attachment state */
+ afs_uint32 attach_flags; /* flags related to attachment state */
pthread_cond_t attach_cv; /* state change condition variable */
short nWaiters; /* volume package internal ref count */
int chainCacheCheck; /* Volume hash chain cache check */
extern Volume * VLookupVolume_r(Error * ec, VolId volumeId, Volume * hint);
#ifdef AFS_DEMAND_ATTACH_FS
-extern Volume *VPreAttachVolumeByName(Error * ec, char *partition, char *name,
- int mode);
-extern Volume *VPreAttachVolumeByName_r(Error * ec, char *partition, char *name,
- int mode);
-extern Volume *VPreAttachVolumeById_r(Error * ec, struct DiskPartition * partp,
- Volume * vp, int volume_id);
+extern Volume *VPreAttachVolumeByName(Error * ec, char *partition, char *name);
+extern Volume *VPreAttachVolumeByName_r(Error * ec, char *partition, char *name);
+extern Volume *VPreAttachVolumeById_r(Error * ec, char * partition,
+ VolId volumeId);
+extern Volume *VPreAttachVolumeByVp_r(Error * ec, struct DiskPartition * partp,
+ Volume * vp, VolId volume_id);
extern Volume *VGetVolumeByVp_r(Error * ec, Volume * vp);
extern int VShutdownByPartition_r(struct DiskPartition * dp);
extern int VShutdownVolume_r(Volume * vp);
extern int VDisconnectSALV_r(void);
extern void VPrintExtendedCacheStats(int flags);
extern void VPrintExtendedCacheStats_r(int flags);
-extern VolState VChangeState_r(Volume * vp, VolState new_state);
extern void VLRU_SetOptions(int option, afs_uint32 val);
extern int VSetVolHashSize(int logsize);
-extern int VRequestSalvage_r(Volume * vp, int reason, int flags);
+extern int VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags);
extern int VRegisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
-extern int VDeregisterVolOp_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
+extern int VDeregisterVolOp_r(Volume * vp);
+extern void VCancelReservation_r(Volume * vp);
#endif /* AFS_DEMAND_ATTACH_FS */
extern int VVolOpLeaveOnline_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
extern int VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo);
--- /dev/null
+/*
+ * Copyright 2005-2008, Sine Nomine Associates and others.
+ * All Rights Reserved.
+ *
+ * This software has been released under the terms of the IBM Public
+ * License. For details, see the LICENSE file in the top-level source
+ * directory or online at http://www.openafs.org/dl/license10.html
+ */
+
+#ifndef _AFS_VOL_VOLUME_INLINE_H
+#define _AFS_VOL_VOLUME_INLINE_H 1
+
+#include "volume.h"
+
+#ifdef AFS_AIX_ENV
+#define static_inline inline
+#else
+#define static_inline static inline
+#endif
+
+
+/***************************************************/
+/* demand attach fs state machine routines */
+/***************************************************/
+
+#ifdef AFS_DEMAND_ATTACH_FS
+/**
+ * tells caller whether or not the current state requires
+ * exclusive access without holding glock.
+ *
+ * @param state volume state enumeration
+ *
+ * @return whether volume state is a mutually exclusive state
+ * @retval 0 no, state is re-entrant
+ * @retval 1 yes, state is mutually exclusive
+ *
+ * @note DEMAND_ATTACH_FS only
+ *
+ * @note should VOL_STATE_SALVSYNC_REQ be treated as exclusive?
+ */
+static_inline int
+VIsExclusiveState(VolState state)
+{
+ switch (state) {
+ case VOL_STATE_UPDATING:
+ case VOL_STATE_ATTACHING:
+ case VOL_STATE_GET_BITMAP:
+ case VOL_STATE_HDR_LOADING:
+ case VOL_STATE_HDR_ATTACHING:
+ case VOL_STATE_OFFLINING:
+ case VOL_STATE_DETACHING:
+ case VOL_STATE_VNODE_ALLOC:
+ case VOL_STATE_VNODE_GET:
+ case VOL_STATE_VNODE_CLOSE:
+ case VOL_STATE_VNODE_RELEASE:
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * tell caller whether V_attachState is an error condition.
+ *
+ * @param state volume state enumeration
+ *
+ * @return whether volume state is in error state
+ * @retval 0 state is not an error state
+ * @retval 1 state is an error state
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline int
+VIsErrorState(VolState state)
+{
+ switch (state) {
+ case VOL_STATE_ERROR:
+ case VOL_STATE_SALVAGING:
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * tell caller whether V_attachState is valid.
+ *
+ * @param state volume state enumeration
+ *
+ * @return whether volume state is a mutually exclusive state
+ * @retval 0 no, state is not valid
+ * @retval 1 yes, state is a valid enumeration member
+ *
+ * @note DEMAND_ATTACH_FS only
+ *
+ * @note do we really want to treat VOL_STATE_FREED as valid?
+ */
+static_inline int
+VIsValidState(VolState state)
+{
+ if ((state >= 0) &&
+ (state < VOL_STATE_COUNT)) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * increment volume-package internal refcount.
+ *
+ * @param vp volume object pointer
+ *
+ * @internal volume package internal use only
+ *
+ * @pre VOL_LOCK must be held
+ *
+ * @post volume waiters refcount is incremented
+ *
+ * @see VCancelReservation_r
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VCreateReservation_r(Volume * vp)
+{
+ vp->nWaiters++;
+}
+
+/**
+ * wait for the volume to change states.
+ *
+ * @param vp volume object pointer
+ *
+ * @pre VOL_LOCK held; ref held on volume
+ *
+ * @post VOL_LOCK held; volume state has changed from previous value
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VWaitStateChange_r(Volume * vp)
+{
+ VolState state_save = V_attachState(vp);
+
+ assert(vp->nWaiters || vp->nUsers);
+ do {
+ VOL_CV_WAIT(&V_attachCV(vp));
+ } while (V_attachState(vp) == state_save);
+ assert(V_attachState(vp) != VOL_STATE_FREED);
+}
+
+/**
+ * wait for blocking ops to end.
+ *
+ * @pre VOL_LOCK held; ref held on volume
+ *
+ * @post VOL_LOCK held; volume not in exclusive state
+ *
+ * @param vp volume object pointer
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline void
+VWaitExclusiveState_r(Volume * vp)
+{
+ assert(vp->nWaiters || vp->nUsers);
+ while (VIsExclusiveState(V_attachState(vp))) {
+ VOL_CV_WAIT(&V_attachCV(vp));
+ }
+ assert(V_attachState(vp) != VOL_STATE_FREED);
+}
+
+/**
+ * change state, and notify other threads,
+ * return previous state to caller.
+ *
+ * @param vp pointer to volume object
+ * @param new_state new volume state value
+ * @pre VOL_LOCK held
+ *
+ * @post volume state changed; stats updated
+ *
+ * @return previous volume state
+ *
+ * @note DEMAND_ATTACH_FS only
+ */
+static_inline VolState
+VChangeState_r(Volume * vp, VolState new_state)
+{
+ VolState old_state = V_attachState(vp);
+
+ /* XXX profiling need to make sure these counters
+ * don't kill performance... */
+ VStats.state_levels[old_state]--;
+ VStats.state_levels[new_state]++;
+
+ V_attachState(vp) = new_state;
+ assert(pthread_cond_broadcast(&V_attachCV(vp)) == 0);
+ return old_state;
+}
+
+#endif /* AFS_DEMAND_ATTACH_FS */
+
+#endif /* _AFS_VOL_VOLUME_INLINE_H */
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
+ *
+ * Portions Copyright (c) 2007-2008 Sine Nomine Associates
*/
#include <afsconfig.h>
#endif
#include <afs/vnode.h>
#include <afs/volume.h>
+#include <afs/volume_inline.h>
#include <afs/partition.h>
#include "vol.h"
#include <afs/daemon_com.h>
}
VUpdateVolume(&error, vp);
tt->vflags = aflags;
- tt->rxCallPtr = (struct rx_call *)0;
if (TRELE(tt) && !error)
return VOLSERTRELE_ERROR;
}
+/**
+ * volint vol info structure type.
+ */
+typedef enum {
+ VOLINT_INFO_TYPE_BASE, /**< volintInfo type */
+ VOLINT_INFO_TYPE_EXT /**< volintXInfo type */
+} volint_info_type_t;
+
+/**
+ * handle to various on-wire vol info types.
+ */
+typedef struct {
+ volint_info_type_t volinfo_type;
+ union {
+ void * opaque;
+ volintInfo * base;
+ volintXInfo * ext;
+ } volinfo_ptr;
+} volint_info_handle_t;
+
+/**
+ * store value to a field at the appropriate location in on-wire structure.
+ */
+#define VOLINT_INFO_STORE(handle, name, val) \
+ do { \
+ if ((handle)->volinfo_type == VOLINT_INFO_TYPE_BASE) { \
+ (handle)->volinfo_ptr.base->name = (val); \
+ } else { \
+ (handle)->volinfo_ptr.ext->name = (val); \
+ } \
+ } while(0)
+
+/**
+ * get pointer to appropriate offset of field in on-wire structure.
+ */
+#define VOLINT_INFO_PTR(handle, name) \
+ (((handle)->volinfo_type == VOLINT_INFO_TYPE_BASE) ? \
+ &((handle)->volinfo_ptr.base->name) : \
+ &((handle)->volinfo_ptr.ext->name))
+
+/**
+ * fill in appropriate type of on-wire volume metadata structure.
+ *
+ * @param vp pointer to volume object
+ * @param hdr pointer to volume disk data object
+ * @param handle pointer to wire format handle object
+ *
+ * @pre handle object must have a valid pointer and enumeration value
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval 1 failure
+ */
+static int
+FillVolInfo(Volume * vp, VolumeDiskData * hdr, volint_info_handle_t * handle)
+{
+ unsigned int numStatBytes, now;
+
+ /*read in the relevant info */
+ strcpy(VOLINT_INFO_PTR(handle, name), hdr->name);
+ VOLINT_INFO_STORE(handle, status, VOK); /*its ok */
+ VOLINT_INFO_STORE(handle, volid, hdr->id);
+ VOLINT_INFO_STORE(handle, type, hdr->type); /*if ro volume */
+ VOLINT_INFO_STORE(handle, cloneID, hdr->cloneId); /*if rw volume */
+ VOLINT_INFO_STORE(handle, backupID, hdr->backupId);
+ VOLINT_INFO_STORE(handle, parentID, hdr->parentId);
+ VOLINT_INFO_STORE(handle, copyDate, hdr->copyDate);
+ VOLINT_INFO_STORE(handle, size, hdr->diskused);
+ VOLINT_INFO_STORE(handle, maxquota, hdr->maxquota);
+ VOLINT_INFO_STORE(handle, filecount, hdr->filecount);
+ now = FT_ApproxTime();
+ if ((now - hdr->dayUseDate) > OneDay) {
+ VOLINT_INFO_STORE(handle, dayUse, 0);
+ } else {
+ VOLINT_INFO_STORE(handle, dayUse, hdr->dayUse);
+ }
+ VOLINT_INFO_STORE(handle, creationDate, hdr->creationDate);
+ VOLINT_INFO_STORE(handle, accessDate, hdr->accessDate);
+ VOLINT_INFO_STORE(handle, updateDate, hdr->updateDate);
+ VOLINT_INFO_STORE(handle, backupDate, hdr->backupDate);
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /*
+ * for DAFS, we "lie" about volume state --
+ * instead of returning the raw state from the disk header,
+ * we compute state based upon the fileserver's internal
+ * in-core state enumeration value reported to us via fssync,
+ * along with the blessed and inService flags from the header.
+ * -- tkeiser 11/27/2007
+ */
+ if ((V_attachState(vp) == VOL_STATE_UNATTACHED) ||
+ VIsErrorState(V_attachState(vp)) ||
+ !hdr->inService ||
+ !hdr->blessed) {
+ VOLINT_INFO_STORE(handle, inUse, 0);
+ } else {
+ VOLINT_INFO_STORE(handle, inUse, 1);
+ }
+#else
+ VOLINT_INFO_STORE(handle, inUse, hdr->inUse);
+#endif
+
+
+ switch(handle->volinfo_type) {
+ case VOLINT_INFO_TYPE_BASE:
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ /* see comment above where we set inUse bit */
+ if (hdr->needsSalvaged || VIsErrorState(V_attachState(vp))) {
+ handle->volinfo_ptr.base->needsSalvaged = 1;
+ } else {
+ handle->volinfo_ptr.base->needsSalvaged = 0;
+ }
+#else
+ handle->volinfo_ptr.base->needsSalvaged = hdr->needsSalvaged;
+#endif
+ handle->volinfo_ptr.base->destroyMe = hdr->destroyMe;
+ handle->volinfo_ptr.base->spare0 = hdr->minquota;
+ handle->volinfo_ptr.base->spare1 =
+ (long)hdr->weekUse[0] +
+ (long)hdr->weekUse[1] +
+ (long)hdr->weekUse[2] +
+ (long)hdr->weekUse[3] +
+ (long)hdr->weekUse[4] +
+ (long)hdr->weekUse[5] +
+ (long)hdr->weekUse[6];
+ handle->volinfo_ptr.base->flags = 0;
+ handle->volinfo_ptr.base->spare2 = hdr->volUpdateCounter;
+ handle->volinfo_ptr.base->spare3 = 0;
+ break;
+
+
+ case VOLINT_INFO_TYPE_EXT:
+ numStatBytes =
+ 4 * ((2 * VOLINT_STATS_NUM_RWINFO_FIELDS) +
+ (4 * VOLINT_STATS_NUM_TIME_FIELDS));
+
+ /*
+ * Copy out the stat fields in a single operation.
+ */
+ if ((now - hdr->dayUseDate) > OneDay) {
+ memset((char *)&(handle->volinfo_ptr.ext->stat_reads[0]),
+ 0, numStatBytes);
+ } else {
+ memcpy((char *)&(handle->volinfo_ptr.ext->stat_reads[0]),
+ (char *)&(hdr->stat_reads[0]),
+ numStatBytes);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * get struct Volume out of the fileserver.
+ *
+ * @param[in] volumeId volumeId for which we want state information
+ * @param[out] vp pointer to Volume object
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval nonzero failure
+ */
+static int
+GetVolObject(afs_uint32 volumeId, Volume * vp)
+{
+ int code;
+ SYNC_response res;
+
+ res.hdr.response_len = sizeof(res.hdr);
+ res.payload.buf = vp;
+ res.payload.len = sizeof(*vp);
+
+ code = FSYNC_VolOp(volumeId,
+ "",
+ FSYNC_VOL_QUERY,
+ 0,
+ &res);
+
+ return code;
+}
+
+/**
+ * mode of volume list operation.
+ */
+typedef enum {
+ VOL_INFO_LIST_SINGLE, /**< performing a single volume list op */
+ VOL_INFO_LIST_MULTIPLE /**< performing a multi-volume list op */
+} vol_info_list_mode_t;
+
+/**
+ * abstract interface to populate wire-format volume metadata structures.
+ *
+ * @param[in] partId partition id
+ * @param[in] volumeId volume id
+ * @param[in] pname partition name
+ * @param[in] volname volume file name
+ * @param[in] handle handle to on-wire volume metadata object
+ * @param[in] mode listing mode
+ *
+ * @return operation status
+ * @retval 0 success
+ * @retval -2 DESTROY_ME flag is set
+ * @retval -1 general failure; some data filled in
+ * @retval -3 couldn't create vtrans; some data filled in
+ */
+static int
+GetVolInfo(afs_uint32 partId,
+ afs_uint32 volumeId,
+ char * pname,
+ char * volname,
+ volint_info_handle_t * handle,
+ vol_info_list_mode_t mode)
+{
+ int code = -1;
+ afs_int32 error;
+ struct volser_trans *ttc = NULL;
+ struct Volume fs_tv, *tv = NULL;
+
+ ttc = NewTrans(volumeId, partId);
+ if (!ttc) {
+ code = -3;
+ VOLINT_INFO_STORE(handle, status, VBUSY);
+ VOLINT_INFO_STORE(handle, volid, volumeId);
+ goto drop;
+ }
+
+ tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
+ if (error) {
+ Log("1 Volser: GetVolInfo: Could not attach volume %u (%s:%s) error=%d\n",
+ volumeId, pname, volname, error);
+ goto drop;
+ }
+
+ /*
+ * please note that destroyMe and needsSalvaged checks used to be ordered
+ * in the opposite manner for ListVolumes and XListVolumes. I think it's
+ * more correct to check destroyMe before needsSalvaged.
+ * -- tkeiser 11/28/2007
+ */
+
+ if (tv->header->diskstuff.destroyMe == DESTROY_ME) {
+ switch (mode) {
+ case VOL_INFO_LIST_MULTIPLE:
+ code = -2;
+ goto drop;
+
+ case VOL_INFO_LIST_SINGLE:
+ Log("1 Volser: GetVolInfo: Volume %u (%s:%s) will be destroyed on next salvage\n",
+ volumeId, pname, volname);
+
+ default:
+ goto drop;
+ }
+ }
+
+ if (tv->header->diskstuff.needsSalvaged) {
+ /*this volume will be salvaged */
+ Log("1 Volser: GetVolInfo: Volume %u (%s:%s) needs to be salvaged\n",
+ volumeId, pname, volname);
+ goto drop;
+ }
+
+#ifdef AFS_DEMAND_ATTACH_FS
+ if (GetVolObject(volumeId, &fs_tv)) {
+ goto drop;
+ }
+#endif
+
+ /* ok, we have all the data we need; fill in the on-wire struct */
+ code = FillVolInfo(&fs_tv, &tv->header->diskstuff, handle);
+
+
+ drop:
+ if (code == -1) {
+ VOLINT_INFO_STORE(handle, status, 0);
+ strcpy(VOLINT_INFO_PTR(handle, name), volname);
+ VOLINT_INFO_STORE(handle, volid, volumeId);
+ }
+ if (tv) {
+ VDetachVolume(&error, tv);
+ tv = NULL;
+ if (error) {
+ VOLINT_INFO_STORE(handle, status, 0);
+ strcpy(VOLINT_INFO_PTR(handle, name), volname);
+ Log("1 Volser: GetVolInfo: Could not detach volume %u (%s:%s)\n",
+ volumeId, pname, volname);
+ }
+ }
+ if (ttc) {
+ DeleteTrans(ttc, 1);
+ ttc = NULL;
+ }
+ return code;
+}
+
+
/*return the header information about the <volid> */
afs_int32
SAFSVolListOneVolume(struct rx_call *acid, afs_int32 partid, afs_int32
volumeId, volEntries *volumeInfo)
{
volintInfo *pntr;
- register struct Volume *tv;
struct DiskPartition *partP;
- struct volser_trans *ttc;
char pname[9], volname[20];
afs_int32 error = 0;
DIR *dirp;
afs_int32 volid;
int found = 0;
unsigned int now;
+ int code;
+ volint_info_handle_t handle;
volumeInfo->volEntries_val = (volintInfo *) malloc(sizeof(volintInfo));
pntr = volumeInfo->volEntries_val;
dirp = opendir(VPartitionPath(partP));
if (dirp == NULL)
return VOLSERILLEGAL_PARTITION;
+
strcpy(volname, "");
- ttc = (struct volser_trans *)0;
- tv = (Volume *) 0; /* volume not attached */
while (strcmp(volname, "EOD") && !found) { /*while there are more volumes in the partition */
if (volid == volumeId) { /*copy other things too */
found = 1;
-#ifndef AFS_PTHREAD_ENV
- IOMGR_Poll(); /*make sure that the client doesnot time out */
-#endif
- ttc = NewTrans(volid, partid);
- if (!ttc) {
- pntr->status = VBUSY;
- pntr->volid = volid;
- goto drop;
- }
- tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
- if (error) {
- pntr->status = 0; /*things are messed up */
- strcpy(pntr->name, volname);
- pntr->volid = volid;
- Log("1 Volser: ListVolumes: Could not attach volume %u (%s:%s), error=%d\n", volid, pname, volname, error);
- goto drop;
- }
- if (tv->header->diskstuff.destroyMe == DESTROY_ME) {
- /*this volume will be salvaged */
- pntr->status = 0;
- strcpy(pntr->name, volname);
- pntr->volid = volid;
- Log("1 Volser: ListVolumes: Volume %u (%s) will be destroyed on next salvage\n", volid, volname);
- goto drop;
- }
-
- if (tv->header->diskstuff.needsSalvaged) {
- /*this volume will be salvaged */
- pntr->status = 0;
- strcpy(pntr->name, volname);
- pntr->volid = volid;
- Log("1 Volser: ListVolumes: Volume %u (%s) needs to be salvaged\n", volid, volname);
- goto drop;
- }
-
- /*read in the relevant info */
- pntr->status = VOK; /*its ok */
- pntr->volid = tv->header->diskstuff.id;
- strcpy(pntr->name, tv->header->diskstuff.name);
- pntr->type = tv->header->diskstuff.type; /*if ro volume */
- pntr->cloneID = tv->header->diskstuff.cloneId; /*if rw volume */
- pntr->backupID = tv->header->diskstuff.backupId;
- pntr->parentID = tv->header->diskstuff.parentId;
- pntr->copyDate = tv->header->diskstuff.copyDate;
- pntr->inUse = tv->header->diskstuff.inUse;
- pntr->size = tv->header->diskstuff.diskused;
- pntr->needsSalvaged = tv->header->diskstuff.needsSalvaged;
- pntr->destroyMe = tv->header->diskstuff.destroyMe;
- pntr->maxquota = tv->header->diskstuff.maxquota;
- pntr->filecount = tv->header->diskstuff.filecount;
- now = FT_ApproxTime();
- if (now - tv->header->diskstuff.dayUseDate > OneDay)
- pntr->dayUse = 0;
- else
- pntr->dayUse = tv->header->diskstuff.dayUse;
- pntr->creationDate = tv->header->diskstuff.creationDate;
- pntr->accessDate = tv->header->diskstuff.accessDate;
- pntr->updateDate = tv->header->diskstuff.updateDate;
- pntr->backupDate = tv->header->diskstuff.backupDate;
- pntr->spare0 = tv->header->diskstuff.minquota;
- pntr->spare1 =
- (long)tv->header->diskstuff.weekUse[0] +
- (long)tv->header->diskstuff.weekUse[1] +
- (long)tv->header->diskstuff.weekUse[2] +
- (long)tv->header->diskstuff.weekUse[3] +
- (long)tv->header->diskstuff.weekUse[4] +
- (long)tv->header->diskstuff.weekUse[5] +
- (long)tv->header->diskstuff.weekUse[6];
- pntr->spare2 = V_volUpCounter(tv);
- pntr->flags = pntr->spare3 = (long)0;
- VDetachVolume(&error, tv); /*free the volume */
- tv = (Volume *) 0;
- if (error) {
- pntr->status = 0; /*things are messed up */
- strcpy(pntr->name, volname);
- Log("1 Volser: ListVolumes: Could not detach volume %s\n",
- volname);
- goto drop;
- }
+ break;
}
+
GetNextVol(dirp, volname, &volid);
}
- drop:
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
+
+ if (found) {
+#ifndef AFS_PTHREAD_ENV
+ IOMGR_Poll(); /*make sure that the client does not time out */
+#endif
+
+ handle.volinfo_type = VOLINT_INFO_TYPE_BASE;
+ handle.volinfo_ptr.base = volumeInfo->volEntries_val;
+
+ code = GetVolInfo(partid,
+ volid,
+ pname,
+ volname,
+ &handle,
+ VOL_INFO_LIST_SINGLE);
}
closedir(dirp);
- if (found)
- return 0;
- else
- return ENODEV;
+ return (found) ? 0 : ENODEV;
}
/*------------------------------------------------------------------------
{ /*SAFSVolXListOneVolume */
volintXInfo *xInfoP; /*Ptr to the extended vol info */
- register struct Volume *tv; /*Volume ptr */
- struct volser_trans *ttc; /*Volume transaction ptr */
struct DiskPartition *partP; /*Ptr to partition */
char pname[9], volname[20]; /*Partition, volume names */
afs_int32 error; /*Error code */
- afs_int32 code; /*Return code */
DIR *dirp; /*Partition directory ptr */
afs_int32 currVolID; /*Current volume ID */
int found = 0; /*Did we find the volume we need? */
- struct VolumeDiskData *volDiskDataP; /*Ptr to on-disk volume data */
- int numStatBytes; /*Num stat bytes to copy per volume */
unsigned int now;
+ int code;
+ volint_info_handle_t handle;
/*
* Set up our pointers for action, marking our structure to hold exactly
if (dirp == NULL)
return (VOLSERILLEGAL_PARTITION);
+ strcpy(volname, "");
+
/*
* Sweep through the partition directory, looking for the desired entry.
* First, of course, figure out how many stat bytes to copy out of each
* volume.
*/
- numStatBytes =
- 4 * ((2 * VOLINT_STATS_NUM_RWINFO_FIELDS) +
- (4 * VOLINT_STATS_NUM_TIME_FIELDS));
- strcpy(volname, "");
- ttc = (struct volser_trans *)0; /*No transaction yet */
- tv = (Volume *) 0; /*Volume not yet attached */
-
while (strcmp(volname, "EOD") && !found) {
/*
* If this is not a volume, move on to the next entry in the
* doesn't time out) and to set up a transaction on the volume.
*/
found = 1;
-#ifndef AFS_PTHREAD_ENV
- IOMGR_Poll();
-#endif
- ttc = NewTrans(currVolID, a_partID);
- if (!ttc) {
- /*
- * Couldn't get a transaction on this volume; let our caller
- * know it's busy.
- */
- xInfoP->status = VBUSY;
- xInfoP->volid = currVolID;
- goto drop;
- }
-
- /*
- * Attach the volume, give up on the volume if we can't.
- */
- tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
- if (error) {
- xInfoP->status = 0; /*things are messed up */
- strcpy(xInfoP->name, volname);
- xInfoP->volid = currVolID;
- Log("1 Volser: XListOneVolume: Could not attach volume %u\n",
- currVolID);
- goto drop;
- }
-
- /*
- * Also bag out on this volume if it's been marked as needing a
- * salvage or to-be-destroyed.
- */
- volDiskDataP = &(tv->header->diskstuff);
- if (volDiskDataP->destroyMe == DESTROY_ME) {
- xInfoP->status = 0;
- strcpy(xInfoP->name, volname);
- xInfoP->volid = currVolID;
- Log("1 Volser: XListOneVolume: Volume %u will be destroyed on next salvage\n", currVolID);
- goto drop;
- }
-
- if (volDiskDataP->needsSalvaged) {
- xInfoP->status = 0;
- strcpy(xInfoP->name, volname);
- xInfoP->volid = currVolID;
- Log("1 Volser: XListOneVolume: Volume %u needs to be salvaged\n", currVolID);
- goto drop;
- }
+ break;
+ } /*Found desired volume */
- /*
- * Pull out the desired info and stuff it into the area we'll be
- * returning to our caller.
- */
- strcpy(xInfoP->name, volDiskDataP->name);
- xInfoP->volid = volDiskDataP->id;
- xInfoP->type = volDiskDataP->type;
- xInfoP->backupID = volDiskDataP->backupId;
- xInfoP->parentID = volDiskDataP->parentId;
- xInfoP->cloneID = volDiskDataP->cloneId;
- xInfoP->status = VOK;
- xInfoP->copyDate = volDiskDataP->copyDate;
- xInfoP->inUse = volDiskDataP->inUse;
- xInfoP->creationDate = volDiskDataP->creationDate;
- xInfoP->accessDate = volDiskDataP->accessDate;
- xInfoP->updateDate = volDiskDataP->updateDate;
- xInfoP->backupDate = volDiskDataP->backupDate;
- xInfoP->filecount = volDiskDataP->filecount;
- xInfoP->maxquota = volDiskDataP->maxquota;
- xInfoP->size = volDiskDataP->diskused;
+ GetNextVol(dirp, volname, &currVolID);
+ }
- /*
- * Copy out the stat fields in a single operation.
- */
- now = FT_ApproxTime();
- if (now - volDiskDataP->dayUseDate > OneDay) {
- xInfoP->dayUse = 0;
- memset((char *)&(xInfoP->stat_reads[0]), 0, numStatBytes);
- } else {
- xInfoP->dayUse = volDiskDataP->dayUse;
- memcpy((char *)&(xInfoP->stat_reads[0]),
- (char *)&(volDiskDataP->stat_reads[0]), numStatBytes);
- }
+ if (found) {
+#ifndef AFS_PTHREAD_ENV
+ IOMGR_Poll();
+#endif
- /*
- * We're done copying. Detach the volume and iterate (at this
- * point, since we found our volume, we'll then drop out of the
- * loop).
- */
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- if (error) {
- xInfoP->status = 0;
- strcpy(xInfoP->name, volname);
- Log("1 Volser: XListOneVolumes Couldn't detach volume %s\n",
- volname);
- goto drop;
- }
+ handle.volinfo_type = VOLINT_INFO_TYPE_EXT;
+ handle.volinfo_ptr.ext = a_volumeXInfoP->volXEntries_val;
- /*
- * At this point, we're golden.
- */
- code = 0;
- } /*Found desired volume */
- GetNextVol(dirp, volname, &currVolID);
- }
+ code = GetVolInfo(a_partID,
+ a_volID,
+ pname,
+ volname,
+ &handle,
+ VOL_INFO_LIST_SINGLE);
- /*
- * Drop the transaction we have for this volume.
- */
- drop:
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
}
/*
* return the proper value.
*/
closedir(dirp);
- return (code);
-
+ return (found) ? 0 : ENODEV;
} /*SAFSVolXListOneVolume */
/*returns all the volumes on partition partid. If flags = 1 then all the
volEntries *volumeInfo)
{
volintInfo *pntr;
- register struct Volume *tv;
struct DiskPartition *partP;
- struct volser_trans *ttc;
afs_int32 allocSize = 1000; /*to be changed to a larger figure */
char pname[9], volname[20];
afs_int32 error = 0;
DIR *dirp;
afs_int32 volid;
unsigned int now;
+ int code;
+ volint_info_handle_t handle;
volumeInfo->volEntries_val =
(volintInfo *) malloc(allocSize * sizeof(volintInfo));
if (dirp == NULL)
return VOLSERILLEGAL_PARTITION;
strcpy(volname, "");
+
while (strcmp(volname, "EOD")) { /*while there are more partitions in the partition */
- ttc = (struct volser_trans *)0; /* new one for each pass */
- tv = (Volume *) 0; /* volume not attached */
if (!strcmp(volname, "")) { /* its not a volume, fetch next file */
GetNextVol(dirp, volname, &volid);
if (flags) { /*copy other things too */
#ifndef AFS_PTHREAD_ENV
- IOMGR_Poll(); /*make sure that the client doesnot time out */
+ IOMGR_Poll(); /*make sure that the client does not time out */
#endif
- ttc = NewTrans(volid, partid);
- if (!ttc) {
- pntr->status = VBUSY;
- pntr->volid = volid;
- goto drop;
- }
- tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
- if (error) {
- pntr->status = 0; /*things are messed up */
- strcpy(pntr->name, volname);
- pntr->volid = volid;
- Log("1 Volser: ListVolumes: Could not attach volume %u (%s) error=%d\n", volid, volname, error);
- goto drop;
- }
- if (tv->header->diskstuff.needsSalvaged) {
- /*this volume will be salvaged */
- pntr->status = 0;
- strcpy(pntr->name, volname);
- pntr->volid = volid;
- Log("1 Volser: ListVolumes: Volume %u (%s) needs to be salvaged\n", volid, volname);
- goto drop;
- }
- if (tv->header->diskstuff.destroyMe == DESTROY_ME) {
- /*this volume will be salvaged */
+ handle.volinfo_type = VOLINT_INFO_TYPE_BASE;
+ handle.volinfo_ptr.base = pntr;
+
+
+ code = GetVolInfo(partid,
+ volid,
+ pname,
+ volname,
+ &handle,
+ VOL_INFO_LIST_MULTIPLE);
+ if (code == -2) { /* DESTROY_ME flag set */
goto drop2;
}
- /*read in the relevant info */
- pntr->status = VOK; /*its ok */
- pntr->volid = tv->header->diskstuff.id;
- strcpy(pntr->name, tv->header->diskstuff.name);
- pntr->type = tv->header->diskstuff.type; /*if ro volume */
- pntr->cloneID = tv->header->diskstuff.cloneId; /*if rw volume */
- pntr->backupID = tv->header->diskstuff.backupId;
- pntr->parentID = tv->header->diskstuff.parentId;
- pntr->copyDate = tv->header->diskstuff.copyDate;
- pntr->inUse = tv->header->diskstuff.inUse;
- pntr->size = tv->header->diskstuff.diskused;
- pntr->needsSalvaged = tv->header->diskstuff.needsSalvaged;
- pntr->maxquota = tv->header->diskstuff.maxquota;
- pntr->filecount = tv->header->diskstuff.filecount;
- now = FT_ApproxTime();
- if (now - tv->header->diskstuff.dayUseDate > OneDay)
- pntr->dayUse = 0;
- else
- pntr->dayUse = tv->header->diskstuff.dayUse;
- pntr->creationDate = tv->header->diskstuff.creationDate;
- pntr->accessDate = tv->header->diskstuff.accessDate;
- pntr->updateDate = tv->header->diskstuff.updateDate;
- pntr->backupDate = tv->header->diskstuff.backupDate;
- pntr->spare0 = tv->header->diskstuff.minquota;
- pntr->spare1 =
- (long)tv->header->diskstuff.weekUse[0] +
- (long)tv->header->diskstuff.weekUse[1] +
- (long)tv->header->diskstuff.weekUse[2] +
- (long)tv->header->diskstuff.weekUse[3] +
- (long)tv->header->diskstuff.weekUse[4] +
- (long)tv->header->diskstuff.weekUse[5] +
- (long)tv->header->diskstuff.weekUse[6];
- pntr->spare2 = V_volUpCounter(tv);
- pntr->flags = pntr->spare3 = (long)0;
- VDetachVolume(&error, tv); /*free the volume */
- tv = (Volume *) 0;
- if (error) {
- pntr->status = 0; /*things are messed up */
- strcpy(pntr->name, volname);
- Log("1 Volser: ListVolumes: Could not detach volume %s\n",
- volname);
- goto drop;
- }
} else {
pntr->volid = volid;
/*just volids are needed */
}
drop:
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
- }
pntr++;
volumeInfo->volEntries_len += 1;
if ((allocSize - volumeInfo->volEntries_len) < 5) {
(volintInfo *) realloc((char *)volumeInfo->volEntries_val,
allocSize * sizeof(volintInfo));
if (pntr == NULL) {
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
- }
- closedir(dirp);
+ closedir(dirp);
return VOLSERNO_MEMORY;
}
volumeInfo->volEntries_val = pntr; /* point to new block */
}
drop2:
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
- }
GetNextVol(dirp, volname, &volid);
}
- closedir(dirp);
- if (ttc)
- DeleteTrans(ttc, 1);
+ closedir(dirp);
return 0;
}
{ /*SAFSVolXListVolumes */
volintXInfo *xInfoP; /*Ptr to the extended vol info */
- register struct Volume *tv; /*Volume ptr */
struct DiskPartition *partP; /*Ptr to partition */
- struct volser_trans *ttc; /*Volume transaction ptr */
afs_int32 allocSize = 1000; /*To be changed to a larger figure */
char pname[9], volname[20]; /*Partition, volume names */
afs_int32 error = 0; /*Return code */
DIR *dirp; /*Partition directory ptr */
afs_int32 volid; /*Current volume ID */
- struct VolumeDiskData *volDiskDataP; /*Ptr to on-disk volume data */
- int numStatBytes; /*Num stat bytes to copy per volume */
unsigned int now;
+ int code;
+ volint_info_handle_t handle;
/*
* Allocate a large array of extended volume info structures, then
dirp = opendir(VPartitionPath(partP));
if (dirp == NULL)
return (VOLSERILLEGAL_PARTITION);
+ strcpy(volname, "");
/*
* Sweep through the partition directory, acting on each entry. First,
* of course, figure out how many stat bytes to copy out of each volume.
*/
- numStatBytes =
- 4 * ((2 * VOLINT_STATS_NUM_RWINFO_FIELDS) +
- (4 * VOLINT_STATS_NUM_TIME_FIELDS));
- strcpy(volname, "");
while (strcmp(volname, "EOD")) {
- ttc = (struct volser_trans *)0; /*New one for each pass */
- tv = (Volume *) 0; /*Volume not yet attached */
/*
* If this is not a volume, move on to the next entry in the
#ifndef AFS_PTHREAD_ENV
IOMGR_Poll();
#endif
- ttc = NewTrans(volid, a_partID);
- if (!ttc) {
- /*
- * Couldn't get a transaction on this volume; let our caller
- * know it's busy.
- */
- xInfoP->status = VBUSY;
- xInfoP->volid = volid;
- goto drop;
- }
-
- /*
- * Attach the volume, give up on this volume if we can't.
- */
- tv = VAttachVolumeByName(&error, pname, volname, V_PEEK);
- if (error) {
- xInfoP->status = 0; /*things are messed up */
- strcpy(xInfoP->name, volname);
- xInfoP->volid = volid;
- Log("1 Volser: XListVolumes: Could not attach volume %u\n",
- volid);
- goto drop;
- }
- /*
- * Also bag out on this volume if it's been marked as needing a
- * salvage or to-be-destroyed.
- */
- volDiskDataP = &(tv->header->diskstuff);
- if (volDiskDataP->needsSalvaged) {
- xInfoP->status = 0;
- strcpy(xInfoP->name, volname);
- xInfoP->volid = volid;
- Log("1 Volser: XListVolumes: Volume %u needs to be salvaged\n", volid);
- goto drop;
- }
+ handle.volinfo_type = VOLINT_INFO_TYPE_EXT;
+ handle.volinfo_ptr.ext = xInfoP;
- if (volDiskDataP->destroyMe == DESTROY_ME)
+ code = GetVolInfo(a_partID,
+ volid,
+ pname,
+ volname,
+ &handle,
+ VOL_INFO_LIST_MULTIPLE);
+ if (code == -2) { /* DESTROY_ME flag set */
goto drop2;
-
- /*
- * Pull out the desired info and stuff it into the area we'll be
- * returning to our caller.
- */
- strcpy(xInfoP->name, volDiskDataP->name);
- xInfoP->volid = volDiskDataP->id;
- xInfoP->type = volDiskDataP->type;
- xInfoP->backupID = volDiskDataP->backupId;
- xInfoP->parentID = volDiskDataP->parentId;
- xInfoP->cloneID = volDiskDataP->cloneId;
- xInfoP->status = VOK;
- xInfoP->copyDate = volDiskDataP->copyDate;
- xInfoP->inUse = volDiskDataP->inUse;
- xInfoP->creationDate = volDiskDataP->creationDate;
- xInfoP->accessDate = volDiskDataP->accessDate;
- xInfoP->updateDate = volDiskDataP->updateDate;
- xInfoP->backupDate = volDiskDataP->backupDate;
- now = FT_ApproxTime();
- if (now - volDiskDataP->dayUseDate > OneDay)
- xInfoP->dayUse = 0;
- else
- xInfoP->dayUse = volDiskDataP->dayUse;
- xInfoP->filecount = volDiskDataP->filecount;
- xInfoP->maxquota = volDiskDataP->maxquota;
- xInfoP->size = volDiskDataP->diskused;
-
- /*
- * Copy out the stat fields in a single operation.
- */
- memcpy((char *)&(xInfoP->stat_reads[0]),
- (char *)&(volDiskDataP->stat_reads[0]), numStatBytes);
-
- /*
- * We're done copying. Detach the volume and iterate.
- */
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- if (error) {
- xInfoP->status = 0;
- strcpy(xInfoP->name, volname);
- Log("1 Volser: XListVolumes: Could not detach volume %s\n",
- volname);
- goto drop;
}
- } /*Full contents desired */
- else
+ } else {
/*
* Just volume IDs are needed.
*/
xInfoP->volid = volid;
-
- drop:
- /*
- * Drop the transaction we have for this volume.
- */
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
}
+ drop:
/*
* Bump the pointer in the data area we're building, along with
* the count of the number of entries it contains.
/*
* Bummer, no memory. Bag it, tell our caller what went wrong.
*/
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
- }
closedir(dirp);
return (VOLSERNO_MEMORY);
}
a_volumeXInfoP->volXEntries_val +
a_volumeXInfoP->volXEntries_len;
}
- /*Need more space */
+
drop2:
- /*
- * Detach our current volume and the transaction on it, then move on
- * to the next volume in the partition directory.
- */
- if (tv) {
- VDetachVolume(&error, tv);
- tv = (Volume *) 0;
- }
- if (ttc) {
- DeleteTrans(ttc, 1);
- ttc = (struct volser_trans *)0;
- }
GetNextVol(dirp, volname, &volid);
} /*Sweep through the partition directory */
* delete our transaction (if any), and go home happy.
*/
closedir(dirp);
- if (ttc)
- DeleteTrans(ttc, 1);
return (0);
} /*SAFSVolXListVolumes */