From 5ced6025b9f11fadbdf2e092bf40cc87499ed277 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Thu, 2 Nov 2017 16:41:52 -0500 Subject: [PATCH] rx: Convert rxinit_status to rx_IsRunning() Currently, all rx code examines the atomic rxinit_status to determine if rx is running (that is, if rx_InitHost has been called, and rx_Finalize/shutdown_rx hasn't been called). This is used in rx.c to see if we're redundantly calling our setup/teardown functions, and outside of rx.c in a couple of places to see if rx-related resources have been initialized. The usage of rxinit_status is a little confusing, since setting bit 0 indicates that rx is not running, and clearing bit 0 indicates rx is running. Since using rxinit_status requires atomic functions, this makes code checking or setting rxinit_status a little verbose, and it can be hard to see what it is checking for. (For example, does 'if (!rx_atomic_test_and_clear_bit(&rxinit_status, 0))' succeed when rx running, or when rx is not running?) The current usage of rxinit_status in rx_InitHost also does not handle initialization errors correctly. rx_InitHost clears rxinit_status near the beginning of the function, but does not set rxinit_status if an error is encountered. This means that any code that checks rxinit_status (such as another rx_InitHost call) will think that rx was initialized successfully, but various resources aren't actually setup. This can cause segfaults and other errors as the code tries to actually use rx. This can easily be seen in bosserver, if bosserver is started up while the local host/port is in use by someone else. bosserver will try to rx_InitHost, which will fail, and then we'll try to rx_InitHost again, which will immediately succeed without doing any init. We then segfault quickly afterwards as we try to use unitialized rx resources. To fix all of this, refactor code using rxinit_status to use a new function, called rx_IsRunning(), to make it a little clearer what we're checking for. We also re-introduce the LOCK_RX_INIT locks to prevent functions like rx_InitHost and rx_Finalize from running in parallel. Note that non-init/shutdown code (such as rx_upcall or rx_GetIFInfo) does not need to wait for LOCK_RX_INIT to check if rx is running or not. These functions only care if rx is currently setup enough to be used, so we can immediately return a 'yes' or 'no' answer. That is, if rx_InitHost is in the middle of running, rx_IsRunning returns 0, since some resouces may not be fully initialized. Change-Id: Ia14a6a725c9662b9db0adef48c33b48a93ffe051 Reviewed-on: https://gerrit.openafs.org/12761 Reviewed-by: Michael Meffie Reviewed-by: Benjamin Kaduk Tested-by: BuildBot --- src/rx/DARWIN/rx_knet.c | 4 +-- src/rx/rx.c | 87 +++++++++++++++++++++++++++++++++++++------------ src/rx/rx_internal.h | 1 + src/rx/rx_user.c | 7 ++-- 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/src/rx/DARWIN/rx_knet.c b/src/rx/DARWIN/rx_knet.c index 8cc24c8..ca7f2c6 100644 --- a/src/rx/DARWIN/rx_knet.c +++ b/src/rx/DARWIN/rx_knet.c @@ -22,8 +22,6 @@ #endif #ifdef RXK_UPCALL_ENV -extern rx_atomic_t rxinit_status; - void rx_upcall(socket_t so, void *arg, __unused int waitflag) { @@ -41,7 +39,7 @@ rx_upcall(socket_t so, void *arg, __unused int waitflag) size_t nbytes, resid, noffset; /* we stopped rx but the socket isn't closed yet */ - if (rx_atomic_test_bit(&rxinit_status, 0)) + if (!rxi_IsRunning()) return; /* See if a check for additional packets was issued */ diff --git a/src/rx/rx.c b/src/rx/rx.c index 04d066e..7615825 100644 --- a/src/rx/rx.c +++ b/src/rx/rx.c @@ -160,6 +160,12 @@ static void rxi_CancelDelayedAbortEvent(struct rx_call *call); static void rxi_CancelGrowMTUEvent(struct rx_call *call); static void update_nextCid(void); +#ifndef KERNEL +static void rxi_Finalize_locked(void); +#elif defined(UKERNEL) +# define rxi_Finalize_locked() do { } while (0) +#endif + #ifdef RX_ENABLE_LOCKS struct rx_tq_debug { rx_atomic_t rxi_start_aborted; /* rxi_start awoke after rxi_Send in error.*/ @@ -442,19 +448,35 @@ static int rxdb_fileID = RXDB_FILE_RX; #endif /* RX_ENABLE_LOCKS */ struct rx_serverQueueEntry *rx_waitForPacket = 0; +/* + * This mutex serializes calls to our initialization and shutdown routines + * (rx_InitHost, rx_Finalize and shutdown_rx). Only one thread can be running + * these at any time; all other threads must wait for it to finish running, and + * then examine the value of rxi_running afterwards. + */ +#ifdef AFS_PTHREAD_ENV +# define LOCK_RX_INIT MUTEX_ENTER(&rx_init_mutex) +# define UNLOCK_RX_INIT MUTEX_EXIT(&rx_init_mutex) +#else +# define LOCK_RX_INIT +# define UNLOCK_RX_INIT +#endif + /* ------------Exported Interfaces------------- */ +static rx_atomic_t rxi_running = RX_ATOMIC_INIT(0); +int +rxi_IsRunning(void) +{ + return rx_atomic_read(&rxi_running); +} + /* Initialize rx. A port number may be mentioned, in which case this * becomes the default port number for any service installed later. * If 0 is provided for the port number, a random port will be chosen * by the kernel. Whether this will ever overlap anything in * /etc/services is anybody's guess... Returns 0 on success, -1 on * error. */ -#if !(defined(AFS_NT40_ENV) || defined(RXK_UPCALL_ENV)) -static -#endif -rx_atomic_t rxinit_status = RX_ATOMIC_INIT(1); - int rx_InitHost(u_int host, u_int port) { @@ -468,15 +490,17 @@ rx_InitHost(u_int host, u_int port) SPLVAR; INIT_PTHREAD_LOCKS; - if (!rx_atomic_test_and_clear_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (rxi_IsRunning()) { + UNLOCK_RX_INIT; return 0; /* already started */ - + } #ifdef RXDEBUG rxi_DebugInit(); #endif #ifdef AFS_NT40_ENV if (afs_winsockInit() < 0) - return -1; + goto error; #endif #ifndef KERNEL @@ -492,7 +516,7 @@ rx_InitHost(u_int host, u_int port) rx_socket = rxi_GetHostUDPSocket(host, (u_short) port); if (rx_socket == OSI_NULLSOCKET) { - return RX_ADDRINUSE; + goto addrinuse; } #if defined(RX_ENABLE_LOCKS) && defined(KERNEL) #ifdef RX_LOCKS_DB @@ -580,19 +604,19 @@ rx_InitHost(u_int host, u_int port) socklen_t addrlen = sizeof(addr); #endif if (getsockname((intptr_t)rx_socket, (struct sockaddr *)&addr, &addrlen)) { - rx_Finalize(); + rxi_Finalize_locked(); osi_Free(htable, rx_hashTableSize * sizeof(struct rx_connection *)); - return -1; + goto error; } rx_port = addr.sin_port; #endif } rx_stats.minRtt.sec = 9999999; if (RAND_bytes(&rx_epoch, sizeof(rx_epoch)) != 1) - return -1; + goto error; rx_epoch = (rx_epoch & ~0x40000000) | 0x80000000; if (RAND_bytes(&rx_nextCid, sizeof(rx_nextCid)) != 1) - return -1; + goto error; rx_nextCid &= RX_CIDMASK; MUTEX_ENTER(&rx_quota_mutex); rxi_dataQuota += rx_extraQuota; /* + extra pkts caller asked to rsrv */ @@ -623,8 +647,19 @@ rx_InitHost(u_int host, u_int port) rxi_StartListener(); USERPRI; - rx_atomic_clear_bit(&rxinit_status, 0); + + rx_atomic_set(&rxi_running, 1); + UNLOCK_RX_INIT; + return 0; + + addrinuse: + UNLOCK_RX_INIT; + return RX_ADDRINUSE; + + error: + UNLOCK_RX_INIT; + return -1; } int @@ -2492,12 +2527,21 @@ rx_EndCall(struct rx_call *call, afs_int32 rc) void rx_Finalize(void) { - struct rx_connection **conn_ptr, **conn_end; - INIT_PTHREAD_LOCKS; - if (rx_atomic_test_and_set_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (!rxi_IsRunning()) { + UNLOCK_RX_INIT; return; /* Already shutdown. */ + } + rxi_Finalize_locked(); + UNLOCK_RX_INIT; +} +static void +rxi_Finalize_locked(void) +{ + struct rx_connection **conn_ptr, **conn_end; + rx_atomic_set(&rxi_running, 0); rxi_DeleteCachedConnections(); if (rx_connHashTable) { MUTEX_ENTER(&rx_connHashTable_lock); @@ -2534,7 +2578,6 @@ rx_Finalize(void) #ifdef AFS_NT40_ENV afs_winsockCleanup(); #endif - } #endif @@ -7843,9 +7886,12 @@ shutdown_rx(void) struct rx_serverQueueEntry *sq; #endif /* KERNEL */ - if (rx_atomic_test_and_set_bit(&rxinit_status, 0)) + LOCK_RX_INIT; + if (!rxi_IsRunning()) { + UNLOCK_RX_INIT; return; /* Already shutdown. */ - + } + rx_atomic_set(&rxi_running, 0); #ifndef KERNEL rx_port = 0; #ifndef AFS_PTHREAD_ENV @@ -7967,6 +8013,7 @@ shutdown_rx(void) rxi_dataQuota = RX_MAX_QUOTA; rxi_availProcs = rxi_totalMin = rxi_minDeficit = 0; MUTEX_EXIT(&rx_quota_mutex); + UNLOCK_RX_INIT; } #ifndef KERNEL diff --git a/src/rx/rx_internal.h b/src/rx/rx_internal.h index 800732e..8d5fd79 100644 --- a/src/rx/rx_internal.h +++ b/src/rx/rx_internal.h @@ -20,6 +20,7 @@ extern rx_atomic_t rx_nWaited; /* Prototypes for internal functions */ /* rx.c */ +extern int rxi_IsRunning(void); extern void rxi_CancelDelayedAckEvent(struct rx_call *); extern void rxi_PacketsUnWait(void); extern void rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, diff --git a/src/rx/rx_user.c b/src/rx/rx_user.c index 4793494..0ba068b 100644 --- a/src/rx/rx_user.c +++ b/src/rx/rx_user.c @@ -352,7 +352,6 @@ rx_getAllAddrMaskMtu(afs_uint32 addrBuffer[], afs_uint32 maskBuffer[], #endif #ifdef AFS_NT40_ENV -extern rx_atomic_t rxinit_status; void rxi_InitMorePackets(void) { int npackets, ncbufs; @@ -373,7 +372,7 @@ rx_GetIFInfo(void) LOCK_IF_INIT; if (Inited) { - if (Inited < 2 && !rx_atomic_test_bit(&rxinit_status, 0)) { + if (Inited < 2 && rxi_IsRunning()) { /* We couldn't initialize more packets earlier. * Do it now. */ rxi_InitMorePackets(); @@ -407,11 +406,11 @@ rx_GetIFInfo(void) UNLOCK_IF; /* - * If rxinit_status is still set, rx_InitHost() has yet to be called + * If rxi_IsRunning is false, rx_InitHost() has yet to be called * and we therefore do not have any mutex locks initialized. As a * result we cannot call rxi_MorePackets() without crashing. */ - if (rx_atomic_test_bit(&rxinit_status, 0)) + if (!rxi_IsRunning()) return; rxi_InitMorePackets(); -- 1.9.4