Windows: use waiters counter instead of osi_TEmpty
[openafs.git] / src / WINNT / client_osi / osibasel.c
index 1779f97..20444f8 100644 (file)
@@ -1,7 +1,7 @@
-/* 
+/*
  * Copyright 2000, International Business Machines Corporation and others.
  * All Rights Reserved.
- * 
+ *
  * This software has been released under the terms of the IBM Public
  * License.  For details, see the LICENSE file in the top-level source
  * directory or online at http://www.openafs.org/dl/license10.html
@@ -26,6 +26,9 @@ static long     atomicIndexCounter = 0;
 static DWORD tls_LockRefH = 0;
 static DWORD tls_LockRefT = 0;
 static BOOLEAN lockOrderValidation = 0;
+static osi_lock_ref_t * lock_ref_FreeListp = NULL;
+static osi_lock_ref_t * lock_ref_FreeListEndp = NULL;
+CRITICAL_SECTION lock_ref_CS;
 
 void osi_BaseInit(void)
 {
@@ -34,21 +37,37 @@ void osi_BaseInit(void)
     for(i=0; i<OSI_MUTEXHASHSIZE; i++)
         InitializeCriticalSection(&osi_baseAtomicCS[i]);
 
-    if ((tls_LockRefH = TlsAlloc()) == TLS_OUT_OF_INDEXES) 
-        osi_panic("TlsAlloc(tls_LockRefH) failure", __FILE__, __LINE__); 
+    if ((tls_LockRefH = TlsAlloc()) == TLS_OUT_OF_INDEXES)
+        osi_panic("TlsAlloc(tls_LockRefH) failure", __FILE__, __LINE__);
 
-    if ((tls_LockRefT = TlsAlloc()) == TLS_OUT_OF_INDEXES) 
-        osi_panic("TlsAlloc(tls_LockRefT) failure", __FILE__, __LINE__); 
-}       
+    if ((tls_LockRefT = TlsAlloc()) == TLS_OUT_OF_INDEXES)
+        osi_panic("TlsAlloc(tls_LockRefT) failure", __FILE__, __LINE__);
 
-void osi_SetLockOrderValidation(int on)
+    InitializeCriticalSection(&lock_ref_CS);
+}
+
+void
+osi_SetLockOrderValidation(int on)
 {
     lockOrderValidation = (BOOLEAN)on;
 }
 
-osi_lock_ref_t *lock_GetLockRef(void * lockp, char type)
+static osi_lock_ref_t *
+lock_GetLockRef(void * lockp, char type)
 {
-    osi_lock_ref_t * lockRefp = (osi_lock_ref_t *)malloc(sizeof(osi_lock_ref_t));
+    osi_lock_ref_t * lockRefp = NULL;
+
+    EnterCriticalSection(&lock_ref_CS);
+    if (lock_ref_FreeListp) {
+        lockRefp = lock_ref_FreeListp;
+        osi_QRemoveHT( (osi_queue_t **) &lock_ref_FreeListp,
+                       (osi_queue_t **) &lock_ref_FreeListEndp,
+                       &lockRefp->q);
+    }
+    LeaveCriticalSection(&lock_ref_CS);
+
+    if (lockRefp == NULL)
+        lockRefp = (osi_lock_ref_t *)malloc(sizeof(osi_lock_ref_t));
 
     memset(lockRefp, 0, sizeof(osi_lock_ref_t));
     lockRefp->type = type;
@@ -66,6 +85,16 @@ osi_lock_ref_t *lock_GetLockRef(void * lockp, char type)
     return lockRefp;
 }
 
+static void
+lock_FreeLockRef(osi_lock_ref_t * lockRefp)
+{
+    EnterCriticalSection(&lock_ref_CS);
+    osi_QAddH( (osi_queue_t **) &lock_ref_FreeListp,
+               (osi_queue_t **) &lock_ref_FreeListEndp,
+               &lockRefp->q);
+    LeaveCriticalSection(&lock_ref_CS);
+}
+
 void lock_VerifyOrderRW(osi_queue_t *lockRefH, osi_queue_t *lockRefT, osi_rwlock_t *lockp)
 {
     char msg[512];
@@ -125,7 +154,8 @@ void lock_ObtainWrite(osi_rwlock_t *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
-        
+    DWORD tid = thrd_Current();
+
     if ((i=lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->ObtainWriteProc)(lockp);
@@ -136,7 +166,7 @@ void lock_ObtainWrite(osi_rwlock_t *lockp)
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
 
-        if (lockp->level != 0) 
+        if (lockp->level != 0)
             lock_VerifyOrderRW(lockRefH, lockRefT, lockp);
     }
 
@@ -144,20 +174,28 @@ void lock_ObtainWrite(osi_rwlock_t *lockp)
     csp = &osi_baseAtomicCS[lockp->atomicIndex];
     EnterCriticalSection(csp);
 
+    if (lockp->flags & OSI_LOCKFLAG_EXCL) {
+        osi_assertx(lockp->tid[0] != tid, "OSI_RWLOCK_WRITEHELD");
+    } else {
+        for ( i=0; i < lockp->readers && i < OSI_RWLOCK_THREADS; i++ ) {
+            osi_assertx(lockp->tid[i] != tid, "OSI_RWLOCK_READHELD");
+        }
+    }
+
     /* here we have the fast lock, so see if we can obtain the real lock */
-    if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL) || 
+    if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL) ||
         (lockp->readers > 0)) {
         lockp->waiters++;
-        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, csp);
+        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, lockp->tid, csp);
         lockp->waiters--;
+        osi_assertx(lockp->waiters >= 0, "waiters underflow");
         osi_assert(lockp->readers == 0 && (lockp->flags & OSI_LOCKFLAG_EXCL));
-    }
-    else {
+    } else {
         /* if we're here, all clear to set the lock */
         lockp->flags |= OSI_LOCKFLAG_EXCL;
+        lockp->tid[0] = tid;
     }
-
-    lockp->tid = thrd_Current();
+    osi_assertx(lockp->readers == 0, "write lock readers present");
 
     LeaveCriticalSection(csp);
 
@@ -167,7 +205,7 @@ void lock_ObtainWrite(osi_rwlock_t *lockp)
         TlsSetValue(tls_LockRefH, lockRefH);
         TlsSetValue(tls_LockRefT, lockRefT);
     }
-}       
+}
 
 void lock_ObtainRead(osi_rwlock_t *lockp)
 {
@@ -175,7 +213,8 @@ void lock_ObtainRead(osi_rwlock_t *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
-        
+    DWORD tid = thrd_Current();
+
     if ((i=lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->ObtainReadProc)(lockp);
@@ -186,7 +225,7 @@ void lock_ObtainRead(osi_rwlock_t *lockp)
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
 
-        if (lockp->level != 0) 
+        if (lockp->level != 0)
             lock_VerifyOrderRW(lockRefH, lockRefT, lockp);
     }
 
@@ -194,18 +233,26 @@ void lock_ObtainRead(osi_rwlock_t *lockp)
     csp = &osi_baseAtomicCS[lockp->atomicIndex];
     EnterCriticalSection(csp);
 
+    if (lockp->flags & OSI_LOCKFLAG_EXCL) {
+        osi_assertx(lockp->tid[0] != tid, "OSI_RWLOCK_WRITEHELD");
+    } else {
+        for ( i=0; i < lockp->readers && i < OSI_RWLOCK_THREADS; i++ ) {
+            osi_assertx(lockp->tid[i] != tid, "OSI_RWLOCK_READHELD");
+        }
+    }
+
     /* here we have the fast lock, so see if we can obtain the real lock */
     if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL)) {
         lockp->waiters++;
-        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4READ, &lockp->readers, csp);
+        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4READ, &lockp->readers, lockp->tid, csp);
         lockp->waiters--;
+        osi_assertx(lockp->waiters >= 0, "waiters underflow");
         osi_assert(!(lockp->flags & OSI_LOCKFLAG_EXCL) && lockp->readers > 0);
-    }
-    else {
+    } else {
         /* if we're here, all clear to set the lock */
-        lockp->readers++;
+        if (++lockp->readers <= OSI_RWLOCK_THREADS)
+            lockp->tid[lockp->readers-1] = tid;
     }
-
     LeaveCriticalSection(csp);
 
     if (lockOrderValidation) {
@@ -222,13 +269,18 @@ void lock_ReleaseRead(osi_rwlock_t *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
-        
+    DWORD tid = thrd_Current();
+
     if ((i = lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->ReleaseReadProc)(lockp);
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         int found = 0;
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
@@ -237,7 +289,7 @@ void lock_ReleaseRead(osi_rwlock_t *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_RW && lockRefp->rw == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 found = 1;
                 break;
             }
@@ -248,17 +300,26 @@ void lock_ReleaseRead(osi_rwlock_t *lockp)
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->readers > 0, "read lock not held");
 
-    /* releasing a read lock can allow readers or writers */
-    if (--lockp->readers == 0 && !osi_TEmpty(&lockp->d.turn)) {
+    for ( i=0; i < lockp->readers; i++) {
+        if ( lockp->tid[i] == tid ) {
+            for ( ; i < lockp->readers - 1; i++)
+                lockp->tid[i] = lockp->tid[i+1];
+            lockp->tid[i] = 0;
+            break;
+        }
+    }
+
+       lockp->readers--;
+
+    /* releasing a read lock can allow writers */
+    if (lockp->readers == 0 && lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, csp);
     }
     else {
+        osi_assertx(lockp->readers >= 0, "read lock underflow");
+
         /* and finally release the big lock */
         LeaveCriticalSection(csp);
     }
@@ -277,6 +338,10 @@ void lock_ReleaseWrite(osi_rwlock_t *lockp)
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         int found = 0;
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
@@ -285,7 +350,7 @@ void lock_ReleaseWrite(osi_rwlock_t *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_RW && lockRefp->rw == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 found = 1;
                 break;
             }
@@ -296,24 +361,20 @@ void lock_ReleaseWrite(osi_rwlock_t *lockp)
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->flags & OSI_LOCKFLAG_EXCL, "write lock not held");
-    osi_assertx(lockp->tid == thrd_Current(), "write lock not held by current thread");
+    osi_assertx(lockp->tid[0] == thrd_Current(), "write lock not held by current thread");
 
-    lockp->tid = 0;
+    lockp->tid[0] = 0;
 
     lockp->flags &= ~OSI_LOCKFLAG_EXCL;
-    if (!osi_TEmpty(&lockp->d.turn)) {
+    if (lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, csp);
     }
     else {
         /* and finally release the big lock */
         LeaveCriticalSection(csp);
     }
-}       
+}
 
 void lock_ConvertWToR(osi_rwlock_t *lockp)
 {
@@ -331,15 +392,15 @@ void lock_ConvertWToR(osi_rwlock_t *lockp)
     EnterCriticalSection(csp);
 
     osi_assertx(lockp->flags & OSI_LOCKFLAG_EXCL, "write lock not held");
-    osi_assertx(lockp->tid == thrd_Current(), "write lock not held by current thread");
+    osi_assertx(lockp->tid[0] == thrd_Current(), "write lock not held by current thread");
 
     /* convert write lock to read lock */
     lockp->flags &= ~OSI_LOCKFLAG_EXCL;
     lockp->readers++;
 
-    lockp->tid = 0;
+    osi_assertx(lockp->readers == 1, "read lock not one");
 
-    if (!osi_TEmpty(&lockp->d.turn)) {
+    if (lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, /* still have readers */ 1, csp);
     }
     else {
@@ -352,6 +413,7 @@ void lock_ConvertRToW(osi_rwlock_t *lockp)
 {
     long i;
     CRITICAL_SECTION *csp;
+    DWORD tid = thrd_Current();
 
     if ((i = lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
@@ -366,19 +428,31 @@ void lock_ConvertRToW(osi_rwlock_t *lockp)
     osi_assertx(!(lockp->flags & OSI_LOCKFLAG_EXCL), "write lock held");
     osi_assertx(lockp->readers > 0, "read lock not held");
 
-    if (--lockp->readers == 0) {
+    for ( i=0; i < lockp->readers; i++) {
+        if ( lockp->tid[i] == tid ) {
+            for ( ; i < lockp->readers - 1; i++)
+                lockp->tid[i] = lockp->tid[i+1];
+            lockp->tid[i] = 0;
+            break;
+        }
+    }
+
+    if (--(lockp->readers) == 0) {
         /* convert read lock to write lock */
         lockp->flags |= OSI_LOCKFLAG_EXCL;
+        lockp->tid[0] = tid;
     } else {
+        osi_assertx(lockp->readers > 0, "read lock underflow");
+
         lockp->waiters++;
-        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, csp);
+        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, lockp->tid, csp);
         lockp->waiters--;
+        osi_assertx(lockp->waiters >= 0, "waiters underflow");
         osi_assert(lockp->readers == 0 && (lockp->flags & OSI_LOCKFLAG_EXCL));
     }
 
-    lockp->tid = thrd_Current();
     LeaveCriticalSection(csp);
-}       
+}
 
 void lock_ObtainMutex(struct osi_mutex *lockp)
 {
@@ -386,37 +460,38 @@ void lock_ObtainMutex(struct osi_mutex *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
-        
+
     if ((i=lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->ObtainMutexProc)(lockp);
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
 
-        if (lockp->level != 0) 
+        if (lockp->level != 0)
             lock_VerifyOrderMX(lockRefH, lockRefT, lockp);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     /* here we have the fast lock, so see if we can obtain the real lock */
     if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL)) {
         lockp->waiters++;
-        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, csp);
+        osi_TWait(&lockp->d.turn, OSI_SLEEPINFO_W4WRITE, &lockp->flags, &lockp->tid, csp);
         lockp->waiters--;
+        osi_assertx(lockp->waiters >= 0, "waiters underflow");
         osi_assert(lockp->flags & OSI_LOCKFLAG_EXCL);
-    }
-    else {
+    } else {
         /* if we're here, all clear to set the lock */
         lockp->flags |= OSI_LOCKFLAG_EXCL;
+        lockp->tid = thrd_Current();
     }
-    lockp->tid = thrd_Current();
+
     LeaveCriticalSection(csp);
 
     if (lockOrderValidation) {
@@ -440,6 +515,10 @@ void lock_ReleaseMutex(struct osi_mutex *lockp)
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         int found = 0;
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
@@ -448,34 +527,30 @@ void lock_ReleaseMutex(struct osi_mutex *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_MUTEX && lockRefp->mx == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 found = 1;
                 break;
             }
         }
-    
+
         osi_assertx(found, "mutex lock not found in TLS queue");
         TlsSetValue(tls_LockRefH, lockRefH);
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->flags & OSI_LOCKFLAG_EXCL, "mutex not held");
     osi_assertx(lockp->tid == thrd_Current(), "mutex not held by current thread");
 
     lockp->flags &= ~OSI_LOCKFLAG_EXCL;
     lockp->tid = 0;
-    if (!osi_TEmpty(&lockp->d.turn)) {
+    if (lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, csp);
     }
     else {
         /* and finally release the big lock */
         LeaveCriticalSection(csp);
     }
-}       
+}
 
 int lock_TryRead(struct osi_rwlock *lockp)
 {
@@ -488,6 +563,10 @@ int lock_TryRead(struct osi_rwlock *lockp)
         if (i >= 0 && i < OSI_NLOCKTYPES)
             return (osi_lockOps[i]->TryReadProc)(lockp);
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -501,17 +580,14 @@ int lock_TryRead(struct osi_rwlock *lockp)
         }
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     /* here we have the fast lock, so see if we can obtain the real lock */
     if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL)) {
         i = 0;
     }
     else {
         /* if we're here, all clear to set the lock */
-        lockp->readers++;
+        if (++(lockp->readers) < OSI_RWLOCK_THREADS)
+            lockp->tid[lockp->readers-1] = thrd_Current();
         i = 1;
     }
 
@@ -525,7 +601,7 @@ int lock_TryRead(struct osi_rwlock *lockp)
     }
 
     return i;
-}       
+}
 
 
 int lock_TryWrite(struct osi_rwlock *lockp)
@@ -539,6 +615,10 @@ int lock_TryWrite(struct osi_rwlock *lockp)
         if (i >= 0 && i < OSI_NLOCKTYPES)
             return (osi_lockOps[i]->TryWriteProc)(lockp);
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -552,10 +632,6 @@ int lock_TryWrite(struct osi_rwlock *lockp)
         }
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     /* here we have the fast lock, so see if we can obtain the real lock */
     if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL)
          || (lockp->readers > 0)) {
@@ -564,12 +640,10 @@ int lock_TryWrite(struct osi_rwlock *lockp)
     else {
         /* if we're here, all clear to set the lock */
         lockp->flags |= OSI_LOCKFLAG_EXCL;
+        lockp->tid[0] = thrd_Current();
         i = 1;
     }
 
-    if (i)
-        lockp->tid = thrd_Current();
-
     LeaveCriticalSection(csp);
 
     if (lockOrderValidation && i) {
@@ -593,6 +667,10 @@ int lock_TryMutex(struct osi_mutex *lockp) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
             return (osi_lockOps[i]->TryMutexProc)(lockp);
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -606,10 +684,6 @@ int lock_TryMutex(struct osi_mutex *lockp) {
         }
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     /* here we have the fast lock, so see if we can obtain the real lock */
     if (lockp->waiters > 0 || (lockp->flags & OSI_LOCKFLAG_EXCL)) {
         i = 0;
@@ -617,12 +691,10 @@ int lock_TryMutex(struct osi_mutex *lockp) {
     else {
         /* if we're here, all clear to set the lock */
         lockp->flags |= OSI_LOCKFLAG_EXCL;
+        lockp->tid = thrd_Current();
         i = 1;
     }
 
-    if (i)
-        lockp->tid = thrd_Current();
-
     LeaveCriticalSection(csp);
 
     if (lockOrderValidation && i) {
@@ -631,6 +703,7 @@ int lock_TryMutex(struct osi_mutex *lockp) {
         TlsSetValue(tls_LockRefH, lockRefH);
         TlsSetValue(tls_LockRefT, lockRefT);
     }
+
     return i;
 }
 
@@ -640,6 +713,7 @@ void osi_SleepR(LONG_PTR sleepVal, struct osi_rwlock *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
+    DWORD tid = thrd_Current();
 
     if ((i = lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
@@ -647,6 +721,10 @@ void osi_SleepR(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -654,7 +732,7 @@ void osi_SleepR(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_RW && lockRefp->rw == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 break;
             }
         }
@@ -663,22 +741,27 @@ void osi_SleepR(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->readers > 0, "osi_SleepR: not held");
 
+    for ( i=0; i < lockp->readers; i++) {
+        if ( lockp->tid[i] == tid ) {
+            for ( ; i < lockp->readers - 1; i++)
+                lockp->tid[i] = lockp->tid[i+1];
+            lockp->tid[i] = 0;
+            break;
+        }
+    }
+
     /* XXX better to get the list of things to wakeup from TSignalForMLs, and
      * then do the wakeup after SleepSpin releases the low-level mutex.
      */
-    if (--lockp->readers == 0 && !osi_TEmpty(&lockp->d.turn)) {
+    if (--(lockp->readers) == 0 && lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, NULL);
     }
 
     /* now call into scheduler to sleep atomically with releasing spin lock */
     osi_SleepSpin(sleepVal, csp);
-}       
+}
 
 void osi_SleepW(LONG_PTR sleepVal, struct osi_rwlock *lockp)
 {
@@ -686,6 +769,7 @@ void osi_SleepW(LONG_PTR sleepVal, struct osi_rwlock *lockp)
     CRITICAL_SECTION *csp;
     osi_queue_t * lockRefH, *lockRefT;
     osi_lock_ref_t *lockRefp;
+    DWORD tid = thrd_Current();
 
     if ((i = lockp->type) != 0) {
         if (i >= 0 && i < OSI_NLOCKTYPES)
@@ -693,6 +777,10 @@ void osi_SleepW(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -700,7 +788,7 @@ void osi_SleepW(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_RW && lockRefp->rw == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 break;
             }
         }
@@ -709,14 +797,11 @@ void osi_SleepW(LONG_PTR sleepVal, struct osi_rwlock *lockp)
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->flags & OSI_LOCKFLAG_EXCL, "osi_SleepW: not held");
 
     lockp->flags &= ~OSI_LOCKFLAG_EXCL;
-    if (!osi_TEmpty(&lockp->d.turn)) {
+    lockp->tid[0] = 0;
+    if (lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, NULL);
     }
 
@@ -737,6 +822,10 @@ void osi_SleepM(LONG_PTR sleepVal, struct osi_mutex *lockp)
         return;
     }
 
+    /* otherwise we're the fast base type */
+    csp = &osi_baseAtomicCS[lockp->atomicIndex];
+    EnterCriticalSection(csp);
+
     if (lockOrderValidation && lockp->level != 0) {
         lockRefH = (osi_queue_t *)TlsGetValue(tls_LockRefH);
         lockRefT = (osi_queue_t *)TlsGetValue(tls_LockRefT);
@@ -744,23 +833,20 @@ void osi_SleepM(LONG_PTR sleepVal, struct osi_mutex *lockp)
         for (lockRefp = (osi_lock_ref_t *)lockRefH ; lockRefp; lockRefp = (osi_lock_ref_t *)osi_QNext(&lockRefp->q)) {
             if (lockRefp->type == OSI_LOCK_MUTEX && lockRefp->mx == lockp) {
                 osi_QRemoveHT(&lockRefH, &lockRefT, &lockRefp->q);
-                free(lockRefp);
+                lock_FreeLockRef(lockRefp);
                 break;
             }
         }
-    
+
         TlsSetValue(tls_LockRefH, lockRefH);
         TlsSetValue(tls_LockRefT, lockRefT);
     }
 
-    /* otherwise we're the fast base type */
-    csp = &osi_baseAtomicCS[lockp->atomicIndex];
-    EnterCriticalSection(csp);
-
     osi_assertx(lockp->flags & OSI_LOCKFLAG_EXCL, "osi_SleepM not held");
-       
+
     lockp->flags &= ~OSI_LOCKFLAG_EXCL;
-    if (!osi_TEmpty(&lockp->d.turn)) {
+    lockp->tid = 0;
+    if (lockp->waiters) {
         osi_TSignalForMLs(&lockp->d.turn, 0, NULL);
     }
 
@@ -775,16 +861,16 @@ void lock_FinalizeRWLock(osi_rwlock_t *lockp)
     if ((i=lockp->type) != 0)
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->FinalizeRWLockProc)(lockp);
-}       
+}
 
 void lock_FinalizeMutex(osi_mutex_t *lockp)
-{       
+{
     long i;
 
     if ((i=lockp->type) != 0)
         if (i >= 0 && i < OSI_NLOCKTYPES)
             (osi_lockOps[i]->FinalizeMutexProc)(lockp);
-}       
+}
 
 void lock_InitializeMutex(osi_mutex_t *mp, char *namep, unsigned short level)
 {
@@ -796,12 +882,11 @@ void lock_InitializeMutex(osi_mutex_t *mp, char *namep, unsigned short level)
         return;
     }
 
-    /* otherwise we have the base case, which requires no special
+    /*
+     * otherwise we have the base case, which requires no special
      * initialization.
      */
-    mp->type = 0;
-    mp->flags = 0;
-    mp->tid = 0;
+    memset(mp, 0, sizeof(osi_mutex_t));
     mp->atomicIndex = (unsigned short)(InterlockedIncrement(&atomicIndexCounter) % OSI_MUTEXHASHSIZE);
     mp->level = level;
     osi_TInit(&mp->d.turn);
@@ -817,15 +902,12 @@ void lock_InitializeRWLock(osi_rwlock_t *mp, char *namep, unsigned short level)
             (osi_lockOps[i]->InitializeRWLockProc)(mp, namep, level);
         return;
     }
-       
+
     /* otherwise we have the base case, which requires no special
      * initialization.
      */
-    mp->type = 0;
-    mp->flags = 0;
+    memset(mp, 0, sizeof(osi_rwlock_t));
     mp->atomicIndex = (unsigned short)(InterlockedIncrement(&atomicIndexCounter) % OSI_MUTEXHASHSIZE);
-    mp->readers = 0;
-    mp->tid = 0;
     mp->level = level;
     osi_TInit(&mp->d.turn);
     return;
@@ -845,11 +927,11 @@ int lock_GetRWLockState(osi_rwlock_t *lp)
     EnterCriticalSection(csp);
 
     /* here we have the fast lock, so see if we can obtain the real lock */
-    if (lp->flags & OSI_LOCKFLAG_EXCL) 
+    if (lp->flags & OSI_LOCKFLAG_EXCL)
         i = OSI_RWLOCK_WRITEHELD;
-    else 
+    else
         i = 0;
-    if (lp->readers > 0) 
+    if (lp->readers > 0)
         i |= OSI_RWLOCK_READHELD;
 
     LeaveCriticalSection(csp);
@@ -857,7 +939,7 @@ int lock_GetRWLockState(osi_rwlock_t *lp)
     return i;
 }
 
-int lock_GetMutexState(struct osi_mutex *mp) 
+int lock_GetMutexState(struct osi_mutex *mp)
 {
     long i;
     CRITICAL_SECTION *csp;