From: Derrick Brashear Date: Mon, 22 Aug 2011 18:56:03 +0000 (-0400) Subject: xvcb lock order violation X-Git-Tag: openafs-devel-1_7_1~98 X-Git-Url: https://git.openafs.org/?p=openafs.git;a=commitdiff_plain;h=12fa5b859b857aaf0ab6975ebac0d4867d0ae0ff xvcb lock order violation afs_FlushVCBs(1) = xvcb, xserver (in that order) afs_GetServer = xserver, xsrvAddr, (call afs_RemoveSrvAddr which calls afs_FlushServer, which gets xvcb) "nope". do a little dance to get xvcb, searching for a struct server to reuse again if we had to block. if you're curious: Lock afs_xserver status: (reader_waitingwriter_waiting, write_locked(pid:1589 at:36), 3 waiters) Lock afs_xvcb status: (none_waiting, write_locked(pid:0 at:273)) Lock afs_xsrvAddr status: (none_waiting, write_locked(pid:1589 at:116)) Change-Id: If295d0b9ce347c1cc24df12cd9934a30dce2a3c6 Reviewed-on: http://gerrit.openafs.org/5294 Tested-by: BuildBot Tested-by: Derrick Brashear Reviewed-by: Derrick Brashear --- diff --git a/src/afs/afs_prototypes.h b/src/afs/afs_prototypes.h index 535ffc2..f32c87c 100644 --- a/src/afs/afs_prototypes.h +++ b/src/afs/afs_prototypes.h @@ -882,7 +882,6 @@ extern int afs_randomMod15(void); extern int afs_randomMod127(void); extern void afs_SortOneServer(struct server *asp); extern void afs_SortServers(struct server *aservers[], int count); -extern void afs_FlushServer(struct server *srvp); extern void afs_RemoveSrvAddr(struct srvAddr *sap); extern void afs_ActivateServer(struct srvAddr *sap); #ifdef AFS_USERSPACE_IP_ADDR diff --git a/src/afs/afs_server.c b/src/afs/afs_server.c index 1baa80b..bcff775 100644 --- a/src/afs/afs_server.c +++ b/src/afs/afs_server.c @@ -1580,9 +1580,9 @@ afs_SetServerPrefs(struct srvAddr *sa) /* afs_FlushServer() * The addresses on this server struct has changed in some way and will * clean up all other structures that may reference it. - * The afs_xserver and afs_xsrvAddr locks are assumed taken. + * The afs_xserver, afs_xvcb and afs_xsrvAddr locks are assumed taken. */ -void +static void afs_FlushServer(struct server *srvp) { afs_int32 i; @@ -1598,12 +1598,10 @@ afs_FlushServer(struct server *srvp) if (srvp->cbrs) { struct afs_cbr *cb, *cbnext; - ObtainWriteLock(&afs_xvcb, 300); for (cb = srvp->cbrs; cb; cb = cbnext) { cbnext = cb->next; afs_FreeCBR(cb); } srvp->cbrs = (struct afs_cbr *)0; - ReleaseWriteLock(&afs_xvcb); } /* If no more srvAddr structs hanging off of this server struct, @@ -1719,6 +1717,23 @@ afs_GetCapabilities(struct server *ts) } +static struct server * +afs_SearchServer(u_short aport, afsUUID * uuidp, afs_int32 locktype, + struct server **oldts, afs_int32 addr_uniquifier) +{ + struct server *ts = afs_FindServer(0, aport, uuidp, locktype); + if (ts && (ts->sr_addr_uniquifier == addr_uniquifier) && ts->addr) { + /* Found a server struct that is multihomed and same + * uniqufier (same IP addrs). The above if statement is the + * same as in InstallUVolumeEntry(). + */ + return ts; + } + if (ts) + *oldts = ts; /* Will reuse if same uuid */ + return NULL; +} + /* afs_GetServer() * Return an updated and properly initialized server structure * corresponding to the server ID, cell, and port specified. @@ -1756,22 +1771,37 @@ afs_GetServer(afs_uint32 * aserverp, afs_int32 nservers, afs_int32 acell, } else { if (nservers <= 0) panic("afs_GetServer: incorrect count of servers"); - ts = afs_FindServer(0, aport, uuidp, locktype); - if (ts && (ts->sr_addr_uniquifier == addr_uniquifier) && ts->addr) { - /* Found a server struct that is multihomed and same - * uniqufier (same IP addrs). The above if statement is the - * same as in InstallUVolumeEntry(). - */ + + ts = afs_SearchServer(aport, uuidp, locktype, &oldts, addr_uniquifier); + if (ts) { ReleaseSharedLock(&afs_xserver); return ts; } - if (ts) - oldts = ts; /* Will reuse if same uuid */ } - UpgradeSToWLock(&afs_xserver, 36); + /* + * Lock hierarchy requires xvcb, then xserver. We *have* xserver. + * Do a little dance and see if we can grab xvcb. If not, we + * need to recheck that oldts is still right after a drop and reobtain. + */ + if (EWOULDBLOCK == NBObtainWriteLock(&afs_xvcb, 300)) { + ReleaseSharedLock(&afs_xserver); + ObtainWriteLock(&afs_xvcb, 299); + ObtainWriteLock(&afs_xserver, 35); + + /* we don't know what changed while we didn't hold the lock */ + oldts = 0; + ts = afs_SearchServer(aport, uuidp, locktype, &oldts, + addr_uniquifier); + if (ts) { + ReleaseWriteLock(&afs_xserver); + ReleaseWriteLock(&afs_xvcb); + return ts; + } + } else { + UpgradeSToWLock(&afs_xserver, 36); + } ObtainWriteLock(&afs_xsrvAddr, 116); - srvcount = afs_totalServers; /* Reuse/allocate a new server structure */ @@ -1901,6 +1931,8 @@ afs_GetServer(afs_uint32 * aserverp, afs_int32 nservers, afs_int32 acell, orphsa->sa_flags &= ~SRVADDR_MH; /* Not multihomed */ } } + /* We can't need this below, and won't reacquire */ + ReleaseWriteLock(&afs_xvcb); srvcount = afs_totalServers - srvcount; /* # servers added and removed */ if (srvcount) {