From: Jeffrey Altman Date: Sat, 5 May 2012 23:11:07 +0000 (-0400) Subject: Windows: cm_GetNewSCache drop lock to permit change X-Git-Tag: openafs-stable-1_8_0pre1~2443 X-Git-Url: https://git.openafs.org/?p=openafs.git;a=commitdiff_plain;h=46c1f1391b6e1b95798e183b8f34cba5c074e0ad Windows: cm_GetNewSCache drop lock to permit change In cm_GetNewSCache the entire LRU queue is searched for a cm_scache_t object that is safe to recycle. If none are the LRU queue was immediately searched again without dropping the cm_scacheLock or taking a pause. As a result it is quite possible that a thread about to release a cm_scache_t was blocked from doing so. This patchset factors some of the logic a bit differently to improve readability and adds new log messages to help diagnose the cause of a problem if no cm_scache_t ever becomes available. Change-Id: Ica6ebee0ce0456e879ae7188d9c8cdc935a92e5b Reviewed-on: http://gerrit.openafs.org/7352 Tested-by: BuildBot Reviewed-by: Jeffrey Altman Tested-by: Jeffrey Altman --- diff --git a/src/WINNT/afsd/cm_scache.c b/src/WINNT/afsd/cm_scache.c index 9b65dae..31afbb0 100644 --- a/src/WINNT/afsd/cm_scache.c +++ b/src/WINNT/afsd/cm_scache.c @@ -317,22 +317,21 @@ cm_GetNewSCache(afs_uint32 locked) if (!buf_dirty && !buf_rdr) { cm_fid_t fid; afs_uint32 fileType; + int success; - if (!lock_TryWrite(&scp->rw)) { - lock_ObtainWrite(&cm_scacheLock); - if (scp_prev != (cm_scache_t *) osi_QPrev(&scp->q) && - scp_next != (cm_scache_t *) osi_QNext(&scp->q)) - break; - else - continue; - } + success = lock_TryWrite(&scp->rw); lock_ObtainWrite(&cm_scacheLock); if (scp_prev != (cm_scache_t *) osi_QPrev(&scp->q) && scp_next != (cm_scache_t *) osi_QNext(&scp->q)) { - lock_ReleaseWrite(&scp->rw); + osi_Log1(afsd_logp, "GetNewSCache scp 0x%p; LRU order changed", scp); + if (success) + lock_ReleaseWrite(&scp->rw); break; + } else if (!success) { + osi_Log1(afsd_logp, "GetNewSCache failed to obtain lock scp 0x%p", scp); + continue; } /* Found a likely candidate. Save type and fid in case we succeed */ @@ -351,23 +350,34 @@ cm_GetNewSCache(afs_uint32 locked) goto done; } lock_ReleaseWrite(&scp->rw); - } else if (!buf_rdr) { - osi_Log1(afsd_logp, "GetNewSCache dirty buffers scp 0x%p", scp); - lock_ObtainWrite(&cm_scacheLock); - if (scp_prev != (cm_scache_t *) osi_QPrev(&scp->q) && - scp_next != (cm_scache_t *) osi_QNext(&scp->q)) - break; } else { - osi_Log1(afsd_logp,"GetNewSCache redirector is holding extents scp 0x%p", scp); + if (buf_rdr) + osi_Log1(afsd_logp,"GetNewSCache redirector is holding extents scp 0x%p", scp); + else + osi_Log1(afsd_logp, "GetNewSCache dirty buffers scp 0x%p", scp); + lock_ObtainWrite(&cm_scacheLock); if (scp_prev != (cm_scache_t *) osi_QPrev(&scp->q) && scp_next != (cm_scache_t *) osi_QNext(&scp->q)) + { + osi_Log1(afsd_logp, "GetNewSCache scp 0x%p; LRU order changed", scp); break; + } } } } /* for */ osi_Log2(afsd_logp, "GetNewSCache all scache entries in use (attempt = %d, count = %u)", attempt, count); + if (scp == NULL) { + /* + * The entire LRU queue was walked and no available cm_scache_t was + * found. Drop the cm_scacheLock and sleep for a moment to give a + * chance for cm_scache_t objects to be released. + */ + lock_ReleaseWrite(&cm_scacheLock); + Sleep(50); + lock_ObtainWrite(&cm_scacheLock); + } } /* FAILURE */ scp = NULL;