Windows: mark volume status online during cm_MergeStatus
[openafs.git] / src / WINNT / afsd / cm_scache.c
index 8d32b4e..bee91c4 100644 (file)
@@ -19,6 +19,7 @@
 #include <osi.h>
 
 #include "afsd.h"
+#include "cm_btree.h"
 
 /*extern void afsi_log(char *pattern, ...);*/
 
@@ -35,36 +36,38 @@ osi_rwlock_t cm_scacheLock;
 /* Dummy scache entry for use with pioctl fids */
 cm_scache_t cm_fakeSCache;
 
+osi_queue_t * cm_allFreeWaiters;        /* protected by cm_scacheLock */
+
 #ifdef AFS_FREELANCE_CLIENT
 extern osi_mutex_t cm_Freelance_Lock;
 #endif
 
 /* must be called with cm_scacheLock write-locked! */
-void cm_AdjustLRU(cm_scache_t *scp)
+void cm_AdjustScacheLRU(cm_scache_t *scp)
 {
-    if (scp == cm_data.scacheLRULastp)
-        cm_data.scacheLRULastp = (cm_scache_t *) osi_QPrev(&scp->q);
+    lock_AssertWrite(&cm_scacheLock);
     osi_QRemoveHT((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **) &cm_data.scacheLRULastp, &scp->q);
-    osi_QAdd((osi_queue_t **) &cm_data.scacheLRUFirstp, &scp->q);
-    if (!cm_data.scacheLRULastp) 
-        cm_data.scacheLRULastp = scp;
+    osi_QAddH((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **) &cm_data.scacheLRULastp, &scp->q);
 }
 
-/* call with scache write-locked and mutex held */
+/* call with cm_scacheLock write-locked and scp rw held */
 void cm_RemoveSCacheFromHashTable(cm_scache_t *scp)
 {
     cm_scache_t **lscpp;
     cm_scache_t *tscp;
     int i;
        
+    lock_AssertWrite(&cm_scacheLock);
+    lock_AssertWrite(&scp->rw);
     if (scp->flags & CM_SCACHEFLAG_INHASH) {
        /* hash it out first */
        i = CM_SCACHE_HASH(&scp->fid);
-       for (lscpp = &cm_data.hashTablep[i], tscp = cm_data.hashTablep[i];
+       for (lscpp = &cm_data.scacheHashTablep[i], tscp = cm_data.scacheHashTablep[i];
             tscp;
             lscpp = &tscp->nextp, tscp = tscp->nextp) {
            if (tscp == scp) {
                *lscpp = scp->nextp;
+                scp->nextp = NULL;
                scp->flags &= ~CM_SCACHEFLAG_INHASH;
                break;
            }
@@ -72,10 +75,44 @@ void cm_RemoveSCacheFromHashTable(cm_scache_t *scp)
     }
 }
 
-/* called with cm_scacheLock write-locked; recycles an existing scp. 
- *
- * this function ignores all of the locking hierarchy.  
- */
+/* called with cm_scacheLock and scp write-locked */
+void cm_ResetSCacheDirectory(cm_scache_t *scp, afs_int32 dirlock)
+{
+#ifdef USE_BPLUS
+    /* destroy directory Bplus Tree */
+    if (scp->dirBplus) {
+        LARGE_INTEGER start, end;
+
+        if (!dirlock && !lock_TryWrite(&scp->dirlock)) {
+            /* 
+             * We are not holding the dirlock and obtaining it
+             * requires that we drop the scp->rw.  As a result
+             * we will leave the dirBplus tree intact but 
+             * invalidate the version number so that whatever
+             * operation is currently active can safely complete
+             * but the contents will be ignored on the next 
+             * directory operation.
+             */
+            scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
+            return;
+        }
+
+        QueryPerformanceCounter(&start);
+        bplus_free_tree++;
+        freeBtree(scp->dirBplus);
+        scp->dirBplus = NULL;
+        scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
+        QueryPerformanceCounter(&end);
+        
+        if (!dirlock) 
+            lock_ReleaseWrite(&scp->dirlock);
+
+        bplus_free_time += (end.QuadPart - start.QuadPart);
+    }
+#endif
+}
+
+/* called with cm_scacheLock and scp write-locked; recycles an existing scp. */
 long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
 {
     if (scp->refCount != 0) {
@@ -105,9 +142,11 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
                lock_ObtainMutex(&bufp->mx);
                bufp->cmFlags &= ~CM_BUF_CMSTORING;
                bufp->flags &= ~CM_BUF_DIRTY;
+                bufp->dirty_offset = 0;
+                bufp->dirty_length = 0;
                bufp->flags |= CM_BUF_ERROR;
                bufp->error = VNOVNODE;
-               bufp->dataVersion = -1; /* bad */
+               bufp->dataVersion = CM_BUF_VERSION_BAD; /* bad */
                bufp->dirtyCounter++;
                if (bufp->flags & CM_BUF_WAITING) {
                    osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp);
@@ -125,9 +164,11 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
                lock_ObtainMutex(&bufp->mx);
                bufp->cmFlags &= ~CM_BUF_CMFETCHING;
                bufp->flags &= ~CM_BUF_DIRTY;
+                bufp->dirty_offset = 0;
+                bufp->dirty_length = 0;
                bufp->flags |= CM_BUF_ERROR;
                bufp->error = VNOVNODE;
-               bufp->dataVersion = -1; /* bad */
+               bufp->dataVersion = CM_BUF_VERSION_BAD; /* bad */
                bufp->dirtyCounter++;
                if (bufp->flags & CM_BUF_WAITING) {
                    osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp);
@@ -140,13 +181,14 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
        buf_CleanDirtyBuffers(scp); 
     } else {
        /* look for things that shouldn't still be set */
-       osi_assert(scp->bufWritesp == NULL);
-       osi_assert(scp->bufReadsp == NULL);
+       osi_assertx(scp->bufWritesp == NULL, "non-null cm_scache_t bufWritesp");
+       osi_assertx(scp->bufReadsp == NULL, "non-null cm_scache_t bufReadsp");
     }
 #endif
 
     /* invalidate so next merge works fine;
      * also initialize some flags */
+    scp->fileType = 0;
     scp->flags &= ~(CM_SCACHEFLAG_STATD
                     | CM_SCACHEFLAG_DELETED
                     | CM_SCACHEFLAG_RO
@@ -155,21 +197,24 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
                     | CM_SCACHEFLAG_OUTOFSPACE
                     | CM_SCACHEFLAG_EACCESS);
     scp->serverModTime = 0;
-    scp->dataVersion = 0;
+    scp->dataVersion = CM_SCACHE_VERSION_BAD;
+    scp->bufDataVersionLow = CM_SCACHE_VERSION_BAD;
     scp->bulkStatProgress = hzero;
     scp->waitCount = 0;
+    scp->waitQueueT = NULL;
+
+    if (scp->cbServerp) {
+        cm_PutServer(scp->cbServerp);
+        scp->cbServerp = NULL;
+    }
+    scp->cbExpires = 0;
+    scp->volumeCreationDate = 0;
 
     scp->fid.vnode = 0;
     scp->fid.volume = 0;
     scp->fid.unique = 0;
     scp->fid.cell = 0;
-
-    /* discard callback */
-    if (scp->cbServerp) {
-       cm_PutServer(scp->cbServerp);
-       scp->cbServerp = NULL;
-    }
-    scp->cbExpires = 0;
+    scp->fid.hash = 0;
 
     /* remove from dnlc */
     cm_dnlcPurgedp(scp);
@@ -179,14 +224,8 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
      * tried to store this to server but failed */
     scp->mask = 0;
 
-    /* drop held volume ref */
-    if (scp->volp) {
-       cm_PutVolume(scp->volp);
-       scp->volp = NULL;
-    }
-
     /* discard symlink info */
-    scp->mountPointStringp[0] = 0;
+    scp->mountPointStringp[0] = '\0';
     memset(&scp->mountRootFid, 0, sizeof(cm_fid_t));
     memset(&scp->dotdotFid, 0, sizeof(cm_fid_t));
 
@@ -196,33 +235,44 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
     scp->serverLock = (-1);
     scp->exclusiveLocks = 0;
     scp->sharedLocks = 0;
+    scp->lockDataVersion = CM_SCACHE_VERSION_BAD;
+    scp->fsLockCount = 0;
 
     /* not locked, but there can be no references to this guy
      * while we hold the global refcount lock.
      */
     cm_FreeAllACLEnts(scp);
+
+    cm_ResetSCacheDirectory(scp, 0);
     return 0;
 }
 
 
-/* called with cm_scacheLock write-locked; find a vnode to recycle.
+/* 
+ * called with cm_scacheLock write-locked; find a vnode to recycle.
  * Can allocate a new one if desperate, or if below quota (cm_data.maxSCaches).
+ * returns scp->mx held.
  */
 cm_scache_t *cm_GetNewSCache(void)
 {
     cm_scache_t *scp;
     int retry = 0;
 
+    lock_AssertWrite(&cm_scacheLock);
 #if 0
     /* first pass - look for deleted objects */
     for ( scp = cm_data.scacheLRULastp;
          scp;
          scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
     {
-       osi_assert(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.hashTablep);
+       osi_assertx(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.scacheHashTablep,
+                    "invalid cm_scache_t address");
 
        if (scp->refCount == 0) {
            if (scp->flags & CM_SCACHEFLAG_DELETED) {
+                if (!lock_TryWrite(&scp->rw))
+                    continue;
+
                osi_Log1(afsd_logp, "GetNewSCache attempting to recycle deleted scp 0x%x", scp);
                if (!cm_RecycleSCache(scp, CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS)) {
 
@@ -230,18 +280,22 @@ cm_scache_t *cm_GetNewSCache(void)
                    /* now remove from the LRU queue and put it back at the
                     * head of the LRU queue.
                     */
-                   cm_AdjustLRU(scp);
+                   cm_AdjustScacheLRU(scp);
 
                    /* and we're done */
                    return scp;
                } 
+                lock_ReleaseWrite(&scp->rw);
                osi_Log1(afsd_logp, "GetNewSCache recycled failed scp 0x%x", scp);
            } else if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
+                if (!lock_TryWrite(&scp->rw))
+                    continue;
+
                /* we found an entry, so return it */
                /* now remove from the LRU queue and put it back at the
                * head of the LRU queue.
                */
-               cm_AdjustLRU(scp);
+               cm_AdjustScacheLRU(scp);
 
                /* and we're done */
                return scp;
@@ -255,44 +309,53 @@ cm_scache_t *cm_GetNewSCache(void)
        /* There were no deleted scache objects that we could use.  Try to find
         * one that simply hasn't been used in a while.
         */
-           for ( scp = cm_data.scacheLRULastp;
-                 scp;
-                 scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
-           {
-               /* It is possible for the refCount to be zero and for there still
-                * to be outstanding dirty buffers.  If there are dirty buffers,
-                * we must not recycle the scp. */
-               if (scp->refCount == 0 && scp->bufReadsp == NULL && scp->bufWritesp == NULL) {
-                   if (!buf_DirtyBuffersExist(&scp->fid)) {
-                       if (!cm_RecycleSCache(scp, 0)) {
-                           /* we found an entry, so return it */
-                           /* now remove from the LRU queue and put it back at the
-                            * head of the LRU queue.
-                            */
-                           cm_AdjustLRU(scp);
-
-                           /* and we're done */
-                           return scp;
-                       }
-                   } else {
-                       osi_Log1(afsd_logp,"GetNewSCache dirty buffers exist scp 0x%x", scp);
-                   }
-               }       
-           }
-           osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry);
-           
-               return NULL;
+        for ( scp = cm_data.scacheLRULastp;
+              scp;
+              scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
+        {
+            /* It is possible for the refCount to be zero and for there still
+             * to be outstanding dirty buffers.  If there are dirty buffers,
+             * we must not recycle the scp. */
+            if (scp->refCount == 0 && scp->bufReadsp == NULL && scp->bufWritesp == NULL) {
+                if (!buf_DirtyBuffersExist(&scp->fid)) {
+                    if (!lock_TryWrite(&scp->rw))
+                        continue;
+
+                    if (!cm_RecycleSCache(scp, 0)) {
+                        /* we found an entry, so return it */
+                        /* now remove from the LRU queue and put it back at the
+                         * head of the LRU queue.
+                         */
+                        cm_AdjustScacheLRU(scp);
+
+                        /* and we're done */
+                        return scp;
+                    }
+                    lock_ReleaseWrite(&scp->rw);
+                } else {
+                    osi_Log1(afsd_logp,"GetNewSCache dirty buffers exist scp 0x%x", scp);
+                }
+            }  
+        }
+        osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry);
+
+        return NULL;
     }
         
     /* if we get here, we should allocate a new scache entry.  We either are below
      * quota or we have a leak and need to allocate a new one to avoid panicing.
      */
     scp = cm_data.scacheBaseAddress + cm_data.currentSCaches;
-    osi_assert(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.hashTablep);
+    osi_assertx(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.scacheHashTablep,
+                "invalid cm_scache_t address");
     memset(scp, 0, sizeof(cm_scache_t));
     scp->magic = CM_SCACHE_MAGIC;
-    lock_InitializeMutex(&scp->mx, "cm_scache_t mutex");
-    lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
+    lock_InitializeRWLock(&scp->rw, "cm_scache_t rw", LOCK_HIERARCHY_SCACHE);
+    osi_assertx(lock_TryWrite(&scp->rw), "cm_scache_t rw held after allocation");
+    lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock", LOCK_HIERARCHY_SCACHE_BUFCREATE);
+#ifdef USE_BPLUS
+    lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock", LOCK_HIERARCHY_SCACHE_DIRLOCK);
+#endif
     scp->serverLock = -1;
 
     /* and put it in the LRU queue */
@@ -302,13 +365,26 @@ cm_scache_t *cm_GetNewSCache(void)
     cm_data.currentSCaches++;
     cm_dnlcPurgedp(scp); /* make doubly sure that this is not in dnlc */
     cm_dnlcPurgevp(scp); 
+    scp->allNextp = cm_data.allSCachesp;
+    cm_data.allSCachesp = scp;
     return scp;
 }       
 
+void cm_SetFid(cm_fid_t *fidp, afs_uint32 cell, afs_uint32 volume, afs_uint32 vnode, afs_uint32 unique)
+{
+    fidp->cell = cell;
+    fidp->volume = volume;
+    fidp->vnode = vnode;
+    fidp->unique = unique;
+    fidp->hash = ((cell & 0xF) << 28) | ((volume & 0x3F) << 22) | ((vnode & 0x7FF) << 11) | (unique & 0x7FF);
+}
+
 /* like strcmp, only for fids */
-int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
+__inline int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
 {
-    if (ap->vnode != bp->vnode) 
+    if (ap->hash != bp->hash)
+        return 1;
+    if (ap->vnode != bp->vnode)
         return 1;
     if (ap->volume != bp->volume) 
         return 1;
@@ -323,15 +399,21 @@ void cm_fakeSCacheInit(int newFile)
 {
     if ( newFile ) {
         memset(&cm_data.fakeSCache, 0, sizeof(cm_scache_t));
+        cm_data.fakeSCache.magic = CM_SCACHE_MAGIC;
         cm_data.fakeSCache.cbServerp = (struct cm_server *)(-1);
+        cm_data.fakeSCache.cbExpires = (time_t)-1;
         /* can leave clientModTime at 0 */
         cm_data.fakeSCache.fileType = CM_SCACHETYPE_FILE;
         cm_data.fakeSCache.unixModeBits = 0777;
         cm_data.fakeSCache.length.LowPart = 1000;
         cm_data.fakeSCache.linkCount = 1;
         cm_data.fakeSCache.refCount = 1;
+        cm_data.fakeSCache.serverLock = -1;
+        cm_data.fakeSCache.dataVersion = CM_SCACHE_VERSION_BAD;
     }
-    lock_InitializeMutex(&cm_data.fakeSCache.mx, "cm_scache_t mutex");
+    lock_InitializeRWLock(&cm_data.fakeSCache.rw, "cm_scache_t rw", LOCK_HIERARCHY_SCACHE);
+    lock_InitializeRWLock(&cm_data.fakeSCache.bufCreateLock, "cm_scache_t bufCreateLock", LOCK_HIERARCHY_SCACHE_BUFCREATE);
+    lock_InitializeRWLock(&cm_data.fakeSCache.dirlock, "cm_scache_t dirlock", LOCK_HIERARCHY_SCACHE_DIRLOCK);
 }
 
 long
@@ -365,11 +447,6 @@ cm_ValidateSCache(void)
             fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
             return -3;
         }
-        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-            return -4;
-        }
         if (i > cm_data.currentSCaches ) {
             afsi_log("cm_ValidateSCache failure: LRU First queue loops");
             fprintf(stderr, "cm_ValidateSCache failure: LUR First queue loops\n");
@@ -399,11 +476,6 @@ cm_ValidateSCache(void)
             fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
             return -7;
         }
-        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-            return -8;
-        }
         if (i > cm_data.currentSCaches ) {
             afsi_log("cm_ValidateSCache failure: LRU Last queue loops");
             fprintf(stderr, "cm_ValidateSCache failure: LUR Last queue loops\n");
@@ -416,8 +488,10 @@ cm_ValidateSCache(void)
         }
     }
 
-    for ( i=0; i < cm_data.hashTableSize; i++ ) {
-        for ( scp = cm_data.hashTablep[i]; scp; scp = scp->nextp ) {
+    for ( i=0; i < cm_data.scacheHashTableSize; i++ ) {
+        for ( scp = cm_data.scacheHashTablep[i]; scp; scp = scp->nextp ) {
+            afs_uint32 hash;
+            hash = CM_SCACHE_HASH(&scp->fid);
             if (scp->magic != CM_SCACHE_MAGIC) {
                 afsi_log("cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC");
                 fprintf(stderr, "cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC\n");
@@ -433,10 +507,10 @@ cm_ValidateSCache(void)
                 fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
                 return -11;
             }
-            if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-                afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-                fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-                return -12;
+            if (hash != i) {
+                afsi_log("cm_ValidateSCache failure: scp hash != hash index");
+                fprintf(stderr, "cm_ValidateSCache failure: scp hash != hash index\n");
+                return -13;
             }
         }
     }
@@ -444,21 +518,80 @@ cm_ValidateSCache(void)
     return cm_dnlcValidate();
 }
 
+void
+cm_SuspendSCache(void)
+{
+    cm_scache_t * scp;
+    time_t now;
+
+    cm_GiveUpAllCallbacksAllServers(TRUE);
+
+    /* 
+     * After this call all servers are marked down.
+     * Do not clear the callbacks, instead change the
+     * expiration time so that the callbacks will be expired
+     * when the servers are marked back up.  However, we
+     * want the callbacks to be preserved as long as the 
+     * servers are down.  That way if the machine resumes
+     * without network, the stat cache item will still be
+     * considered valid.
+     */
+    now = time(NULL);
+
+    lock_ObtainWrite(&cm_scacheLock);
+    for ( scp = cm_data.allSCachesp; scp; scp = scp->allNextp ) {
+        if (scp->cbServerp) {
+            if (scp->flags & CM_SCACHEFLAG_PURERO) {
+                cm_volume_t *volp = cm_GetVolumeByFID(&scp->fid);
+                if (volp) {
+                    if (volp->cbExpiresRO == scp->cbExpires)
+                        volp->cbExpiresRO = now+1;
+                    cm_PutVolume(volp);
+                }
+            }
+            scp->cbExpires = now+1;
+        }
+    }
+    lock_ReleaseWrite(&cm_scacheLock);
+}
+
 long
 cm_ShutdownSCache(void)
 {
     cm_scache_t * scp;
 
-    for ( scp = cm_data.scacheLRULastp; scp;
-          scp = (cm_scache_t *) osi_QPrev(&scp->q) ) {
+    lock_ObtainWrite(&cm_scacheLock);
+
+    for ( scp = cm_data.allSCachesp; scp;
+          scp = scp->allNextp ) {
         if (scp->randomACLp) {
-            lock_ObtainMutex(&scp->mx);
+            lock_ReleaseWrite(&cm_scacheLock);
+            lock_ObtainWrite(&scp->rw);
+            lock_ObtainWrite(&cm_scacheLock);
             cm_FreeAllACLEnts(scp);
-            lock_ReleaseMutex(&scp->mx);
+            lock_ReleaseWrite(&scp->rw);
+        }
+
+        if (scp->cbServerp) {
+            cm_PutServer(scp->cbServerp);
+            scp->cbServerp = NULL;
         }
-        lock_FinalizeMutex(&scp->mx);
+        scp->cbExpires = 0;
+        scp->flags &= ~CM_SCACHEFLAG_CALLBACK;
+
+#ifdef USE_BPLUS
+        if (scp->dirBplus)
+            freeBtree(scp->dirBplus);
+        scp->dirBplus = NULL;
+        scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
+        lock_FinalizeRWLock(&scp->dirlock);
+#endif
+        lock_FinalizeRWLock(&scp->rw);
         lock_FinalizeRWLock(&scp->bufCreateLock);
     }
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    cm_GiveUpAllCallbacksAllServers(FALSE);
 
     return cm_dnlcShutdown();
 }
@@ -468,22 +601,26 @@ void cm_InitSCache(int newFile, long maxSCaches)
     static osi_once_t once;
         
     if (osi_Once(&once)) {
-        lock_InitializeRWLock(&cm_scacheLock, "cm_scacheLock");
+        lock_InitializeRWLock(&cm_scacheLock, "cm_scacheLock", LOCK_HIERARCHY_SCACHE_GLOBAL);
         if ( newFile ) {
-            memset(cm_data.hashTablep, 0, sizeof(cm_scache_t *) * cm_data.hashTableSize);
+            memset(cm_data.scacheHashTablep, 0, sizeof(cm_scache_t *) * cm_data.scacheHashTableSize);
+            cm_data.allSCachesp = NULL;
             cm_data.currentSCaches = 0;
             cm_data.maxSCaches = maxSCaches;
             cm_data.scacheLRUFirstp = cm_data.scacheLRULastp = NULL;
         } else {
             cm_scache_t * scp;
 
-            for ( scp = cm_data.scacheLRULastp; scp;
-                  scp = (cm_scache_t *) osi_QPrev(&scp->q) ) {
-                lock_InitializeMutex(&scp->mx, "cm_scache_t mutex");
-                lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
-
+            for ( scp = cm_data.allSCachesp; scp;
+                  scp = scp->allNextp ) {
+                lock_InitializeRWLock(&scp->rw, "cm_scache_t rw", LOCK_HIERARCHY_SCACHE);
+                lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock", LOCK_HIERARCHY_SCACHE_BUFCREATE);
+#ifdef USE_BPLUS
+                lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock", LOCK_HIERARCHY_SCACHE_DIRLOCK);
+#endif
                 scp->cbServerp = NULL;
                 scp->cbExpires = 0;
+                scp->volumeCreationDate = 0;
                 scp->fileLocksH = NULL;
                 scp->fileLocksT = NULL;
                 scp->serverLock = (-1);
@@ -495,6 +632,11 @@ void cm_InitSCache(int newFile, long maxSCaches)
                 scp->openShares = 0;
                 scp->openExcls = 0;
                 scp->waitCount = 0;
+#ifdef USE_BPLUS
+                scp->dirBplus = NULL;
+                scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
+#endif
+                scp->waitQueueT = NULL;
                 scp->flags &= ~CM_SCACHEFLAG_WAITING;
             }
         }
@@ -502,6 +644,7 @@ void cm_InitSCache(int newFile, long maxSCaches)
         cm_freeFileLocks = NULL;
         cm_lockRefreshCycle = 0;
         cm_fakeSCacheInit(newFile);
+        cm_allFreeWaiters = NULL;
         cm_dnlcInit(newFile);
         osi_EndOnce(&once);
     }
@@ -519,16 +662,17 @@ cm_scache_t *cm_FindSCache(cm_fid_t *fidp)
        return NULL;
     }
 
-    lock_ObtainWrite(&cm_scacheLock);
-    for (scp=cm_data.hashTablep[hash]; scp; scp=scp->nextp) {
+    lock_ObtainRead(&cm_scacheLock);
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
             cm_HoldSCacheNoLock(scp);
-            cm_AdjustLRU(scp);
+            lock_ConvertRToW(&cm_scacheLock);
+            cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             return scp;
         }
     }
-    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ReleaseRead(&cm_scacheLock);
     return NULL;
 }
 
@@ -541,43 +685,50 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
 #endif
 {
     long hash;
-    cm_scache_t *scp;
+    cm_scache_t *scp = NULL;
     long code;
     cm_volume_t *volp = NULL;
     cm_cell_t *cellp;
-    char* mp = NULL;
-    int special; // yj: boolean variable to test if file is on root.afs
-    int isRoot;
+    int special = 0; // yj: boolean variable to test if file is on root.afs
+    int isRoot = 0;
     extern cm_fid_t cm_rootFid;
         
     hash = CM_SCACHE_HASH(fidp);
         
-    osi_assert(fidp->cell != 0);
+    if (fidp->cell == 0)
+        return CM_ERROR_INVAL;
 
-    if (fidp->cell== cm_data.rootFid.cell && 
-         fidp->volume==cm_data.rootFid.volume &&
-         fidp->vnode==0x0 && fidp->unique==0x0)
-    {
-        osi_Log0(afsd_logp,"cm_GetSCache called with root cell/volume and vnode=0 and unique=0");
-    }
+#ifdef AFS_FREELANCE_CLIENT
+    special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+               fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
+               !(fidp->vnode==0x1 && fidp->unique==0x1));
+    isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+              fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
+              fidp->vnode==0x1 && fidp->unique==0x1);
+#endif
 
     // yj: check if we have the scp, if so, we don't need
     // to do anything else
     lock_ObtainWrite(&cm_scacheLock);
-    for (scp=cm_data.hashTablep[hash]; scp; scp=scp->nextp) {
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
 #ifdef DEBUG_REFCOUNT
-           afsi_log("%s:%d cm_GetSCache (1) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
-           osi_Log1(afsd_logp,"cm_GetSCache (1) outScpp 0x%p", scp);
+           afsi_log("%s:%d cm_GetSCache (1) scp 0x%p ref %d", file, line, scp, scp->refCount);
+           osi_Log1(afsd_logp,"cm_GetSCache (1) scp 0x%p", scp);
+#endif
+#ifdef AFS_FREELANCE_CLIENT
+            if (cm_freelanceEnabled && special && 
+                cm_data.fakeDirVersion != scp->dataVersion)
+                break;
 #endif
             cm_HoldSCacheNoLock(scp);
             *outScpp = scp;
-            cm_AdjustLRU(scp);
+            cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             return 0;
         }
     }
-        
+
     // yj: when we get here, it means we don't have an scp
     // so we need to either load it or fake it, depending
     // on whether the file is "special", see below.
@@ -587,12 +738,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     // because we have to fill in the status stuff 'coz we
     // don't want trybulkstat to fill it in for us
 #ifdef AFS_FREELANCE_CLIENT
-    special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
-               fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-               !(fidp->vnode==0x1 && fidp->unique==0x1));
-    isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
-              fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-              fidp->vnode==0x1 && fidp->unique==0x1);
     if (cm_freelanceEnabled && isRoot) {
         osi_Log0(afsd_logp,"cm_GetSCache Freelance and isRoot");
         /* freelance: if we are trying to get the root scp for the first
@@ -602,56 +747,47 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     }
          
     if (cm_freelanceEnabled && special) {
+        lock_ReleaseWrite(&cm_scacheLock);
         osi_Log0(afsd_logp,"cm_GetSCache Freelance and special");
-        if (fidp->vnode > 1 && fidp->vnode <= cm_noLocalMountPoints + 2) {
-           lock_ObtainMutex(&cm_Freelance_Lock);
-            mp =(cm_localMountPoints+fidp->vnode-2)->mountPointStringp;
-            lock_ReleaseMutex(&cm_Freelance_Lock);
-        } else {
-            mp = "";
+
+        if (cm_getLocalMountPointChange()) {   // check for changes
+            cm_clearLocalMountPointChange();    // clear the changefile
+            cm_reInitLocalMountPoints();       // start reinit
         }
-        scp = cm_GetNewSCache();
-       if (scp == NULL) {
-           osi_Log0(afsd_logp,"cm_GetSCache unable to obtain *new* scache entry");
-            lock_ReleaseWrite(&cm_scacheLock);
-           return CM_ERROR_WOULDBLOCK;
-       }
 
-#if not_too_dangerous
-       /* dropping the cm_scacheLock allows more than one thread
-        * to obtain the same cm_scache_t from the LRU list.  Since
-        * the refCount is known to be zero at this point we have to
-        * assume that no one else is using the one this is returned.
-        */
-       lock_ReleaseWrite(&cm_scacheLock);
-       lock_ObtainMutex(&scp->mx);
-       lock_ObtainWrite(&cm_scacheLock);
-#endif
+        lock_ObtainWrite(&cm_scacheLock);
+        if (scp == NULL) {
+            scp = cm_GetNewSCache();    /* returns scp->rw held */
+            if (scp == NULL) {
+                osi_Log0(afsd_logp,"cm_GetSCache unable to obtain *new* scache entry");
+                lock_ReleaseWrite(&cm_scacheLock);
+                return CM_ERROR_WOULDBLOCK;
+            }
+        } else {
+            lock_ReleaseWrite(&cm_scacheLock);
+            lock_ObtainWrite(&scp->rw);
+            lock_ObtainWrite(&cm_scacheLock);
+        }
         scp->fid = *fidp;
-        scp->volp = cm_data.rootSCachep->volp;
-       cm_GetVolume(scp->volp);        /* grab an additional reference */
         scp->dotdotFid.cell=AFS_FAKE_ROOT_CELL_ID;
         scp->dotdotFid.volume=AFS_FAKE_ROOT_VOL_ID;
         scp->dotdotFid.unique=1;
         scp->dotdotFid.vnode=1;
         scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-        scp->nextp=cm_data.hashTablep[hash];
-        cm_data.hashTablep[hash]=scp;
-        scp->flags |= CM_SCACHEFLAG_INHASH;
+        if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
+            scp->nextp = cm_data.scacheHashTablep[hash];
+            cm_data.scacheHashTablep[hash] = scp;
+            scp->flags |= CM_SCACHEFLAG_INHASH;
+        }
         scp->refCount = 1;
        osi_Log1(afsd_logp,"cm_GetSCache (freelance) sets refCount to 1 scp 0x%x", scp);
-        if (fidp->vnode > 1 && fidp->vnode <= cm_noLocalMountPoints + 2)
-            scp->fileType = (cm_localMountPoints+fidp->vnode-2)->fileType;
-        else 
-            scp->fileType = CM_SCACHETYPE_INVALID;
 
-        lock_ObtainMutex(&cm_Freelance_Lock);
-        scp->length.LowPart = (DWORD)strlen(mp)+4;
+        /* must be called after the scp->fid is set */
+        cm_FreelanceFetchMountPointString(scp);
+        cm_FreelanceFetchFileType(scp);
+        
+        scp->length.LowPart = (DWORD)strlen(scp->mountPointStringp)+4;
         scp->length.HighPart = 0;
-        strncpy(scp->mountPointStringp,mp,MOUNTPOINTLEN);
-        scp->mountPointStringp[MOUNTPOINTLEN-1] = '\0';
-        lock_ReleaseMutex(&cm_Freelance_Lock);
-
         scp->owner=0x0;
         scp->unixModeBits=0777;
         scp->clientModTime=FakeFreelanceModTime;
@@ -660,15 +796,15 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
         scp->parentVnode=0x1;
         scp->group=0;
         scp->dataVersion=cm_data.fakeDirVersion;
-        scp->lockDataVersion=-1; /* no lock yet */
-#if not_too_dangerous
-       lock_ReleaseMutex(&scp->mx);
-#endif
-       *outScpp = scp;
+        scp->bufDataVersionLow=cm_data.fakeDirVersion;
+        scp->lockDataVersion=CM_SCACHE_VERSION_BAD; /* no lock yet */
+        scp->fsLockCount=0;
+        lock_ReleaseWrite(&scp->rw);
         lock_ReleaseWrite(&cm_scacheLock);
+       *outScpp = scp;
 #ifdef DEBUG_REFCOUNT
-       afsi_log("%s:%d cm_GetSCache (2) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
-       osi_Log1(afsd_logp,"cm_GetSCache (2) outScpp 0x%p", scp);
+       afsi_log("%s:%d cm_GetSCache (2) scp 0x%p ref %d", file, line, scp, scp->refCount);
+       osi_Log1(afsd_logp,"cm_GetSCache (2) scp 0x%p", scp);
 #endif
         return 0;
     }
@@ -678,11 +814,11 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     /* otherwise, we need to find the volume */
     if (!cm_freelanceEnabled || !isRoot) {
         lock_ReleaseWrite(&cm_scacheLock);     /* for perf. reasons */
-        cellp = cm_FindCellByID(fidp->cell);
+        cellp = cm_FindCellByID(fidp->cell, 0);
         if (!cellp) 
             return CM_ERROR_NOSUCHCELL;
 
-        code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, &volp);
+        code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_CREATE, &volp);
         if (code) 
             return code;
         lock_ObtainWrite(&cm_scacheLock);
@@ -691,15 +827,14 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     /* otherwise, we have the volume, now reverify that the scp doesn't
      * exist, and proceed.
      */
-    for (scp=cm_data.hashTablep[hash]; scp; scp=scp->nextp) {
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
 #ifdef DEBUG_REFCOUNT
-           afsi_log("%s:%d cm_GetSCache (3) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
-           osi_Log1(afsd_logp,"cm_GetSCache (3) outScpp 0x%p", scp);
+           afsi_log("%s:%d cm_GetSCache (3) scp 0x%p ref %d", file, line, scp, scp->refCount);
+           osi_Log1(afsd_logp,"cm_GetSCache (3) scp 0x%p", scp);
 #endif
             cm_HoldSCacheNoLock(scp);
-            osi_assert(scp->volp == volp);
-            cm_AdjustLRU(scp);
+            cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             if (volp)
                 cm_PutVolume(volp);
@@ -709,7 +844,7 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     }
         
     /* now, if we don't have the fid, recycle something */
-    scp = cm_GetNewSCache();
+    scp = cm_GetNewSCache();    /* returns scp->rw held */
     if (scp == NULL) {
        osi_Log0(afsd_logp,"cm_GetNewSCache unable to obtain *new* scache entry");
        lock_ReleaseWrite(&cm_scacheLock);
@@ -717,45 +852,43 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
            cm_PutVolume(volp);
        return CM_ERROR_WOULDBLOCK;
     }
-    osi_Log2(afsd_logp,"cm_GetNewSCache returns scp 0x%x flags 0x%x", scp, scp->flags);
+#ifdef DEBUG_REFCOUNT
+    afsi_log("%s:%d cm_GetNewSCache returns scp 0x%p flags 0x%x", file, line, scp, scp->flags);
+#endif
+    osi_Log2(afsd_logp,"cm_GetNewSCache returns scp 0x%p flags 0x%x", scp, scp->flags);
 
-    osi_assert(!(scp->flags & CM_SCACHEFLAG_INHASH));
+    osi_assertx(!(scp->flags & CM_SCACHEFLAG_INHASH), "CM_SCACHEFLAG_INHASH set");
 
-#if not_too_dangerous
-    /* dropping the cm_scacheLock allows more than one thread
-     * to obtain the same cm_scache_t from the LRU list.  Since
-     * the refCount is known to be zero at this point we have to
-     * assume that no one else is using the one this is returned.
-     */
-    lock_ReleaseWrite(&cm_scacheLock);
-    lock_ObtainMutex(&scp->mx);
-    lock_ObtainWrite(&cm_scacheLock);
-#endif
     scp->fid = *fidp;
-    scp->volp = volp;  /* a held reference */
-
     if (!cm_freelanceEnabled || !isRoot) {
         /* if this scache entry represents a volume root then we need 
          * to copy the dotdotFipd from the volume structure where the 
          * "master" copy is stored (defect 11489)
          */
-        if (scp->fid.vnode == 1 && scp->fid.unique == 1) {
-           scp->dotdotFid = volp->dotdotFid;
-        }
-         
-        if (volp->roID == fidp->volume)
+        if (volp->vol[ROVOL].ID == fidp->volume) {
            scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-        else if (volp->bkID == fidp->volume)
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, ROVOL)->dotdotFid;
+        } else if (volp->vol[BACKVOL].ID == fidp->volume) {
            scp->flags |= CM_SCACHEFLAG_RO;
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, BACKVOL)->dotdotFid;
+        } else {
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, RWVOL)->dotdotFid;
+        }
     }
-    scp->nextp = cm_data.hashTablep[hash];
-    cm_data.hashTablep[hash] = scp;
+    if (volp)
+        cm_PutVolume(volp);
+    scp->nextp = cm_data.scacheHashTablep[hash];
+    cm_data.scacheHashTablep[hash] = scp;
     scp->flags |= CM_SCACHEFLAG_INHASH;
+    lock_ReleaseWrite(&scp->rw);
     scp->refCount = 1;
-    osi_Log1(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%x", scp);
-#if not_too_dangerous
-    lock_ReleaseMutex(&scp->mx);
+#ifdef DEBUG_REFCOUNT
+    afsi_log("%s:%d cm_GetSCache sets refCount to 1 scp 0x%x", file, line, scp);
 #endif
+    osi_Log1(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%x", scp);
 
     /* XXX - The following fields in the cm_scache are 
      * uninitialized:
@@ -763,14 +896,14 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
      *   parentVnode
      *   parentUnique
      */
-    lock_ReleaseWrite(&cm_scacheLock);
         
     /* now we have a held scache entry; just return it */
     *outScpp = scp;
 #ifdef DEBUG_REFCOUNT
-    afsi_log("%s:%d cm_GetSCache (4) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
-    osi_Log1(afsd_logp,"cm_GetSCache (4) outScpp 0x%p", scp);
+    afsi_log("%s:%d cm_GetSCache (4) scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log1(afsd_logp,"cm_GetSCache (4) scp 0x%p", scp);
 #endif
+    lock_ReleaseWrite(&cm_scacheLock);
     return 0;
 }
 
@@ -783,14 +916,12 @@ cm_scache_t * cm_FindSCacheParent(cm_scache_t * scp)
     cm_fid_t    parent_fid;
     cm_scache_t * pscp = NULL;
 
-    lock_ObtainRead(&cm_scacheLock);
-    parent_fid = scp->fid;
-    parent_fid.vnode = scp->parentVnode;
-    parent_fid.unique = scp->parentUnique;
+    lock_ObtainWrite(&cm_scacheLock);
+    cm_SetFid(&parent_fid, scp->fid.cell, scp->fid.volume, scp->parentVnode, scp->parentUnique);
 
     if (cm_FidCmp(&scp->fid, &parent_fid)) {
        i = CM_SCACHE_HASH(&parent_fid);
-       for (pscp = cm_data.hashTablep[i]; pscp; pscp = pscp->nextp) {
+       for (pscp = cm_data.scacheHashTablep[i]; pscp; pscp = pscp->nextp) {
            if (!cm_FidCmp(&pscp->fid, &parent_fid)) {
                cm_HoldSCacheNoLock(pscp);
                break;
@@ -798,11 +929,74 @@ cm_scache_t * cm_FindSCacheParent(cm_scache_t * scp)
        }
     }
 
-    lock_ReleaseRead(&cm_scacheLock);
+    lock_ReleaseWrite(&cm_scacheLock);
 
     return pscp;
 }
 
+void cm_SyncOpAddToWaitQueue(cm_scache_t * scp, afs_int32 flags, cm_buf_t * bufp)
+{
+    cm_scache_waiter_t * w;
+
+    lock_ObtainWrite(&cm_scacheLock);
+    if (cm_allFreeWaiters == NULL) {
+        w = malloc(sizeof(*w));
+        memset(w, 0, sizeof(*w));
+    } else {
+        w = (cm_scache_waiter_t *) cm_allFreeWaiters;
+        osi_QRemove(&cm_allFreeWaiters, (osi_queue_t *) w);
+    }
+
+    w->threadId = thrd_Current();
+    w->scp = scp;
+    cm_HoldSCacheNoLock(scp);
+    w->flags = flags;
+    w->bufp = bufp;
+
+    osi_QAddT(&scp->waitQueueH, &scp->waitQueueT, (osi_queue_t *) w);
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    osi_Log2(afsd_logp, "cm_SyncOpAddToWaitQueue : Adding thread to wait queue scp 0x%p w 0x%p", scp, w);
+}
+
+int cm_SyncOpCheckContinue(cm_scache_t * scp, afs_int32 flags, cm_buf_t * bufp)
+{
+    cm_scache_waiter_t * w;
+    int this_is_me;
+
+    osi_Log0(afsd_logp, "cm_SyncOpCheckContinue checking for continuation");
+
+    lock_ObtainRead(&cm_scacheLock);
+    for (w = (cm_scache_waiter_t *)scp->waitQueueH;
+         w;
+         w = (cm_scache_waiter_t *)osi_QNext((osi_queue_t *) w)) {
+        if (w->flags == flags && w->bufp == bufp) {
+            break;
+        }
+    }
+
+    osi_assertx(w != NULL, "null cm_scache_waiter_t");
+    this_is_me = (w->threadId == thrd_Current());
+    lock_ReleaseRead(&cm_scacheLock);
+
+    if (!this_is_me) {
+        osi_Log1(afsd_logp, "cm_SyncOpCheckContinue MISS: Waiter 0x%p", w);
+        return 0;
+    }
+
+    osi_Log1(afsd_logp, "cm_SyncOpCheckContinue HIT: Waiter 0x%p", w);
+
+    lock_ObtainWrite(&cm_scacheLock);
+    osi_QRemoveHT(&scp->waitQueueH, &scp->waitQueueT, (osi_queue_t *) w);
+    cm_ReleaseSCacheNoLock(scp);
+    memset(w, 0, sizeof(*w));
+    osi_QAdd(&cm_allFreeWaiters, (osi_queue_t *) w);
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    return 1;
+}
+
+
 /* synchronize a fetch, store, read, write, fetch status or store status.
  * Called with scache mutex held, and returns with it held, but temporarily
  * drops it during the fetch.
@@ -869,12 +1063,15 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
     afs_uint32 sleep_scp_flags = 0;
     afs_uint32 sleep_buf_cmflags = 0;
     afs_uint32 sleep_scp_bufs = 0;
+    int wakeupCycle;
+
+    lock_AssertWrite(&scp->rw);
 
     /* lookup this first */
     bufLocked = flags & CM_SCACHESYNC_BUFLOCKED;
 
-       if (bufp)
-               osi_assert(bufp->refCount > 0);
+    if (bufp)
+        osi_assertx(bufp->refCount > 0, "cm_buf_t refCount 0");
 
 
     /* Do the access check.  Now we don't really do the access check
@@ -928,8 +1125,8 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want FETCHDATA", scp);
                 goto sleep;
             }
-            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING))) {
-                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING want FETCHDATA", scp, bufp);
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING | CM_BUF_CMWRITING))) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING|BUF_CMWRITING want FETCHDATA", scp, bufp);
                 goto sleep;
             }
         }
@@ -940,8 +1137,8 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want STOREDATA", scp);
                 goto sleep;
             }
-            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING))) {
-                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING want STOREDATA", scp, bufp);
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING | CM_BUF_CMWRITING))) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING|BUF_CMWRITING want STOREDATA", scp, bufp);
                 goto sleep;
             }
         }
@@ -1007,6 +1204,10 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING want READ", scp, bufp);
                 goto sleep;
             }
+            if (bufp && (bufp->cmFlags & CM_BUF_CMWRITING)) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMWRITING want READ", scp, bufp);
+                goto sleep;
+            }
         }
         if (flags & CM_SCACHESYNC_WRITE) {
             /* don't write unless the status is stable and the chunk
@@ -1017,8 +1218,15 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING want WRITE", scp);
                 goto sleep;
             }
-            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING))) {
-                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING want WRITE", scp, bufp);
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING |
+                                          CM_BUF_CMSTORING |
+                                          CM_BUF_CMWRITING))) {
+                osi_Log3(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is %s want WRITE",
+                         scp, bufp,
+                         ((bufp->cmFlags & CM_BUF_CMFETCHING) ? "CM_BUF_CMFETCHING":
+                          ((bufp->cmFlags & CM_BUF_CMSTORING) ? "CM_BUF_CMSTORING" :
+                           ((bufp->cmFlags & CM_BUF_CMWRITING) ? "CM_BUF_CMWRITING" :
+                            "UNKNOWN!!!"))));
                 goto sleep;
             }
         }
@@ -1042,9 +1250,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                    lock_ReleaseMutex(&bufp->mx);
                 code = cm_GetCallback(scp, userp, reqp, (flags & CM_SCACHESYNC_FORCECB)?1:0);
                 if (bufLocked) {
-                    lock_ReleaseMutex(&scp->mx);
+                    lock_ReleaseWrite(&scp->rw);
                     lock_ObtainMutex(&bufp->mx);
-                    lock_ObtainMutex(&scp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
                 if (code) 
                     return code;
@@ -1055,9 +1263,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
 
         if (rights) {
             /* can't check access rights without a callback */
-            osi_assert(flags & CM_SCACHESYNC_NEEDCALLBACK);
+            osi_assertx(flags & CM_SCACHESYNC_NEEDCALLBACK, "!CM_SCACHESYNC_NEEDCALLBACK");
 
-            if ((rights & PRSFS_WRITE) && (scp->flags & CM_SCACHEFLAG_RO))
+            if ((rights & (PRSFS_WRITE|PRSFS_DELETE)) && (scp->flags & CM_SCACHEFLAG_RO))
                 return CM_ERROR_READONLY;
 
             if (cm_HaveAccessRights(scp, userp, rights, &outRights)) {
@@ -1069,9 +1277,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 if (bufLocked) lock_ReleaseMutex(&bufp->mx);
                 code = cm_GetAccessRights(scp, userp, reqp);
                 if (bufLocked) {
-                    lock_ReleaseMutex(&scp->mx);
+                    lock_ReleaseWrite(&scp->rw);
                     lock_ObtainMutex(&bufp->mx);
-                    lock_ObtainMutex(&scp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
                 if (code) 
                     return code;
@@ -1089,6 +1297,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
         if (flags & CM_SCACHESYNC_NOWAIT) 
             return CM_ERROR_WOULDBLOCK;
 
+        /* These are used for minidump debugging */
        sleep_scp_flags = scp->flags;           /* so we know why we slept */
        sleep_buf_cmflags = bufp ? bufp->cmFlags : 0;
        sleep_scp_bufs = (scp->bufReadsp ? 1 : 0) | (scp->bufWritesp ? 2 : 0);
@@ -1105,15 +1314,20 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
             scp->flags |= CM_SCACHEFLAG_WAITING;
             scp->waitCount = scp->waitRequests = 1;
         }
-        if (bufLocked) 
-            lock_ReleaseMutex(&bufp->mx);
-        osi_SleepM((LONG_PTR) &scp->flags, &scp->mx);
 
-       smb_UpdateServerPriority();
+        cm_SyncOpAddToWaitQueue(scp, flags, bufp);
+        wakeupCycle = 0;
+        do {
+            if (bufLocked) 
+                lock_ReleaseMutex(&bufp->mx);
+            osi_SleepW((LONG_PTR) &scp->flags, &scp->rw);
+            if (bufLocked) 
+                lock_ObtainMutex(&bufp->mx);
+            lock_ObtainWrite(&scp->rw);
+        } while (!cm_SyncOpCheckContinue(scp, flags, bufp));
+
+       cm_UpdateServerPriority();
 
-        if (bufLocked) 
-            lock_ObtainMutex(&bufp->mx);
-        lock_ObtainMutex(&scp->mx);
         scp->waitCount--;
         osi_Log3(afsd_logp, "CM SyncOp woke! scp 0x%p; still waiting %d threads of %d requests", 
                  scp, scp->waitCount, scp->waitRequests);
@@ -1146,7 +1360,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
         if (bufp) {
             for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
                 tbufp = osi_GetQData(qdp);
-                osi_assert(tbufp != bufp);
+                osi_assertx(tbufp != bufp, "unexpected cm_buf_t value");
             }
         }
 
@@ -1165,7 +1379,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
         if (bufp) {
             for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
                 tbufp = osi_GetQData(qdp);
-                osi_assert(tbufp != bufp);
+                osi_assertx(tbufp != bufp, "unexpected cm_buf_t value");
             }
         }
 
@@ -1179,6 +1393,13 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
         osi_QAdd((osi_queue_t **) &scp->bufWritesp, &qdp->q);
     }
 
+    if (flags & CM_SCACHESYNC_WRITE) {
+        /* mark the buffer as being written to. */
+        if (bufp) {
+            bufp->cmFlags |= CM_BUF_CMWRITING;
+        }
+    }
+
     return 0;
 }
 
@@ -1190,7 +1411,7 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
     osi_queueData_t *qdp;
     cm_buf_t *tbufp;
 
-    lock_AssertMutex(&scp->mx);
+    lock_AssertWrite(&scp->rw);
 
     /* now, update the recorded state for RPC-type calls */
     if (flags & CM_SCACHESYNC_FETCHSTATUS)
@@ -1259,6 +1480,14 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
         }
     }
 
+    if (flags & CM_SCACHESYNC_WRITE) {
+        if (bufp) {
+            osi_assertx(bufp->cmFlags & CM_BUF_CMWRITING, "!CM_BUF_CMWRITING");
+
+            bufp->cmFlags &= ~CM_BUF_CMWRITING;
+        }
+    }
+
     /* and wakeup anyone who is waiting */
     if (scp->flags & CM_SCACHEFLAG_WAITING) {
         osi_Log1(afsd_logp, "CM SyncOpDone Waking scp 0x%p", scp);
@@ -1279,20 +1508,33 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
  * handled after the callback breaking is done, but only one of whose calls
  * started before that, can cause old info to be merged from the first call.
  */
-void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
-                    cm_user_t *userp, afs_uint32 flags)
+void cm_MergeStatus(cm_scache_t *dscp, 
+                   cm_scache_t *scp, AFSFetchStatus *statusp, 
+                   AFSVolSync *volsyncp,
+                    cm_user_t *userp, cm_req_t *reqp, afs_uint32 flags)
 {
+    afs_uint64 dataVersion;
+    struct cm_volume *volp = NULL;
+    struct cm_cell *cellp = NULL;
+
     // yj: i want to create some fake status for the /afs directory and the
     // entries under that directory
 #ifdef AFS_FREELANCE_CLIENT
-    if (cm_freelanceEnabled && scp == cm_data.rootSCachep) {
-        osi_Log0(afsd_logp,"cm_MergeStatus Freelance cm_data.rootSCachep");
+    if (cm_freelanceEnabled && scp->fid.cell==AFS_FAKE_ROOT_CELL_ID &&
+         scp->fid.volume==AFS_FAKE_ROOT_VOL_ID) {
+        if (scp == cm_data.rootSCachep) {
+            osi_Log0(afsd_logp,"cm_MergeStatus Freelance cm_data.rootSCachep");
+            statusp->FileType = CM_SCACHETYPE_DIRECTORY;
+            statusp->Length = cm_fakeDirSize;
+            statusp->Length_hi = 0;
+        } else {
+            statusp->FileType = scp->fileType;
+            statusp->Length = scp->length.LowPart;
+            statusp->Length_hi = scp->length.HighPart;
+        }
         statusp->InterfaceVersion = 0x1;
-        statusp->FileType = CM_SCACHETYPE_DIRECTORY;
         statusp->LinkCount = scp->linkCount;
-        statusp->Length = cm_fakeDirSize;
-        statusp->Length_hi = 0;
-        statusp->DataVersion = cm_data.fakeDirVersion;
+        statusp->DataVersion = (afs_uint32)(cm_data.fakeDirVersion & 0xFFFFFFFF);
         statusp->Author = 0x1;
         statusp->Owner = 0x0;
         statusp->CallerAccess = 0x9;
@@ -1305,36 +1547,63 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
         statusp->ServerModTime = FakeFreelanceModTime;
         statusp->Group = 0;
         statusp->SyncCounter = 0;
-        statusp->dataVersionHigh = 0;
-       statusp->errorCode = 0;
+        statusp->dataVersionHigh = (afs_uint32)(cm_data.fakeDirVersion >> 32);
+        statusp->lockCount = 0;
+        statusp->errorCode = 0;
     }
 #endif /* AFS_FREELANCE_CLIENT */
 
     if (statusp->errorCode != 0) {     
        scp->flags |= CM_SCACHEFLAG_EACCESS;
        osi_Log2(afsd_logp, "Merge, Failure scp %x code 0x%x", scp, statusp->errorCode);
+
+       scp->fileType = 0;      /* unknown */
+
+       scp->serverModTime = 0;
+       scp->clientModTime = 0;
+       scp->length.LowPart = 0;
+       scp->length.HighPart = 0;
+       scp->serverLength.LowPart = 0;
+       scp->serverLength.HighPart = 0;
+       scp->linkCount = 0;
+       scp->owner = 0;
+       scp->group = 0;
+       scp->unixModeBits = 0;
+       scp->anyAccess = 0;
+       scp->dataVersion = CM_SCACHE_VERSION_BAD;
+        scp->bufDataVersionLow = CM_SCACHE_VERSION_BAD;
+        scp->fsLockCount = 0;
+
+       if (dscp) {
+            scp->parentVnode = dscp->fid.vnode;
+            scp->parentUnique = dscp->fid.unique;
+       } else {
+            scp->parentVnode = 0;
+            scp->parentUnique = 0;
+       }
+       goto done;
     } else {
        scp->flags &= ~CM_SCACHEFLAG_EACCESS;
     }
 
-    if (!(flags & CM_MERGEFLAG_FORCE)
-         && statusp->DataVersion < (unsigned long) scp->dataVersion) {
-        struct cm_cell *cellp;
+    dataVersion = statusp->dataVersionHigh;
+    dataVersion <<= 32;
+    dataVersion |= statusp->DataVersion;
 
-        cellp = cm_FindCellByID(scp->fid.cell);
-        if (scp->cbServerp) {
-            struct cm_volume *volp = NULL;
+    if (!(flags & CM_MERGEFLAG_FORCE) && 
+        dataVersion < scp->dataVersion &&
+        scp->dataVersion != CM_SCACHE_VERSION_BAD) {
 
-            cm_GetVolumeByID(cellp, scp->fid.volume, userp,
-                              (cm_req_t *) NULL, &volp);
+        cellp = cm_FindCellByID(scp->fid.cell, 0);
+        if (scp->cbServerp) {
+            cm_FindVolumeByID(cellp, scp->fid.volume, userp,
+                              reqp, CM_GETVOL_FLAG_CREATE, &volp);
             osi_Log2(afsd_logp, "old data from server %x volume %s",
                       scp->cbServerp->addr.sin_addr.s_addr,
                       volp ? volp->namep : "(unknown)");
-            if (volp)
-                cm_PutVolume(volp);
         }
         osi_Log3(afsd_logp, "Bad merge, scp %x, scp dv %d, RPC dv %d",
-                  scp, scp->dataVersion, statusp->DataVersion);
+                  scp, scp->dataVersion, dataVersion);
         /* we have a number of data fetch/store operations running
          * concurrently, and we can tell which one executed last at the
          * server by its mtime.
@@ -1363,9 +1632,11 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
          * infinite loop.  So we just grin and bear it.
          */
         if (!(scp->flags & CM_SCACHEFLAG_RO))
-            return;
+            goto done;
     }       
 
+    scp->volumeCreationDate = volsyncp->spare1;       /* volume creation date */
+
     scp->serverModTime = statusp->ServerModTime;
 
     if (!(scp->mask & CM_SCACHEMASK_CLIENTMODTIME)) {
@@ -1401,7 +1672,8 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
     /* and other stuff */
     scp->parentVnode = statusp->ParentVnode;
     scp->parentUnique = statusp->ParentUnique;
-        
+    scp->fsLockCount = statusp->lockCount;
+
     /* and merge in the private acl cache info, if this is more than the public
      * info; merge in the public stuff in any case.
      */
@@ -1411,19 +1683,110 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
         cm_AddACLCache(scp, userp, statusp->CallerAccess);
     }
 
-    if ((flags & CM_MERGEFLAG_STOREDATA) &&
-       statusp->DataVersion - scp->dataVersion == 1) {
-       cm_buf_t *bp;
-
-       for (bp = cm_data.buf_fileHashTablepp[BUF_FILEHASH(&scp->fid)]; bp; bp=bp->fileHashp)
+    if (scp->dataVersion != 0 &&
+        (!(flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion != scp->dataVersion ||
+         (flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion - scp->dataVersion > 1)) {
+        /* 
+         * We now know that all of the data buffers that we have associated
+         * with this scp are invalid.  Subsequent operations will go faster
+         * if the buffers are removed from the hash tables.
+         *
+         * We do not remove directory buffers if the dataVersion delta is 1 because
+         * those version numbers will be updated as part of the directory operation.
+         *
+         * We do not remove storedata buffers because they will still be valid.
+         */
+        int i, j;
+        cm_buf_t **lbpp;
+        cm_buf_t *tbp;
+        cm_buf_t *bp, *prevBp, *nextBp;
+
+        lock_ObtainWrite(&buf_globalLock);
+        i = BUF_FILEHASH(&scp->fid);
+               for (bp = cm_data.buf_fileHashTablepp[i]; bp; bp=nextBp)
        {
-           if (cm_FidCmp(&scp->fid, &bp->fid) == 0 && 
-               bp->dataVersion == scp->dataVersion)
-               bp->dataVersion = statusp->DataVersion;
+            nextBp = bp->fileHashp;
+            /* 
+             * if the buffer belongs to this stat cache entry
+             * and the buffer mutex can be obtained, check the
+             * reference count and if it is zero, remove the buffer
+             * from the hash tables.  If there are references,
+             * the buffer might be updated to the current version
+             * so leave it in place.
+             */
+            if (cm_FidCmp(&scp->fid, &bp->fid) == 0 &&
+                 lock_TryMutex(&bp->mx)) {
+                if (bp->refCount == 0 && 
+                    !(bp->flags & CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)) {
+                    prevBp = bp->fileHashBackp;
+                    bp->fileHashBackp = bp->fileHashp = NULL;
+                    if (prevBp)
+                        prevBp->fileHashp = nextBp;
+                    else
+                        cm_data.buf_fileHashTablepp[i] = nextBp;
+                    if (nextBp)
+                        nextBp->fileHashBackp = prevBp;
+
+                    j = BUF_HASH(&bp->fid, &bp->offset);
+                    lbpp = &(cm_data.buf_scacheHashTablepp[j]);
+                    for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
+                        if (tbp == bp) 
+                            break;
+                    }
+
+                    *lbpp = bp->hashp; /* hash out */
+                    bp->hashp = NULL;
+
+                    bp->flags &= ~CM_BUF_INHASH;
+                }
+                lock_ReleaseMutex(&bp->mx);
+            }
        }
+        lock_ReleaseWrite(&buf_globalLock);
+    }
+
+    /* We maintain a range of buffer dataVersion values which are considered 
+     * valid.  This avoids the need to update the dataVersion on each buffer
+     * object during an uncontested storeData operation.  As a result this 
+     * merge status no longer has performance characteristics derived from
+     * the size of the file.
+     */
+    if (((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion > 1) || 
+         (!(flags & CM_MERGEFLAG_STOREDATA) && scp->dataVersion != dataVersion) ||
+         scp->bufDataVersionLow == 0)
+        scp->bufDataVersionLow = dataVersion;
+    
+    scp->dataVersion = dataVersion;
+
+    /* 
+     * If someone is waiting for status information, we can wake them up
+     * now even though the entity that issued the FetchStatus may not 
+     * have completed yet.
+     */
+    cm_SyncOpDone(scp, NULL, CM_SCACHESYNC_FETCHSTATUS);
 
+    /*
+     * We just successfully merged status on the stat cache object.
+     * This means that the associated volume must be online.
+     */
+    if (!volp) {
+        if (!cellp)
+            cellp = cm_FindCellByID(scp->fid.cell, 0);
+        cm_FindVolumeByID(cellp, scp->fid.volume, userp, reqp, 0, &volp);
+    }
+    if (volp) {
+        cm_vol_state_t *statep = cm_VolumeStateByID(volp, scp->fid.volume);
+        if (statep->state != vl_online) {
+            lock_ObtainWrite(&volp->rw);
+            cm_VolumeStatusNotification(volp, statep->ID, statep->state, vl_online);
+            statep->state = vl_online;
+            lock_ReleaseWrite(&volp->rw);
+        }
     }
-    scp->dataVersion = statusp->DataVersion;
+  done:
+    if (volp)
+        cm_PutVolume(volp);
+
 }
 
 /* note that our stat cache info is incorrect, so force us eventually
@@ -1436,17 +1799,20 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
  */
 void cm_DiscardSCache(cm_scache_t *scp)
 {
-    lock_AssertMutex(&scp->mx);
+    lock_AssertWrite(&scp->rw);
     if (scp->cbServerp) {
         cm_PutServer(scp->cbServerp);
        scp->cbServerp = NULL;
     }
     scp->cbExpires = 0;
-    scp->flags &= ~CM_SCACHEFLAG_CALLBACK;
+    scp->flags &= ~(CM_SCACHEFLAG_CALLBACK | CM_SCACHEFLAG_LOCAL);
     cm_dnlcPurgedp(scp);
     cm_dnlcPurgevp(scp);
     cm_FreeAllACLEnts(scp);
 
+    if (scp->fileType == CM_SCACHETYPE_DFSLINK)
+        cm_VolStatus_Invalidate_DFS_Mapping(scp);
+
     /* Force mount points and symlinks to be re-evaluated */
     scp->mountPointStringp[0] = '\0';
 }
@@ -1463,12 +1829,15 @@ void cm_HoldSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
 #else
 void cm_HoldSCacheNoLock(cm_scache_t *scp)
 #endif
-{
-    osi_assert(scp != 0);
-    scp->refCount++;
+{     
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_AssertAny(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_HoldSCacheNoLock scp 0x%p ref %d",scp, scp->refCount);
-    afsi_log("%s:%d cm_HoldSCacheNoLock scp 0x%p, ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_HoldSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCacheNoLock scp 0x%p, ref %d", file, line, scp, refCount);
 #endif
 }
 
@@ -1478,14 +1847,16 @@ void cm_HoldSCacheDbg(cm_scache_t *scp, char * file, long line)
 void cm_HoldSCache(cm_scache_t *scp)
 #endif
 {
-    osi_assert(scp != 0);
-    lock_ObtainWrite(&cm_scacheLock);
-    scp->refCount++;
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_HoldSCache scp 0x%p ref %d",scp, scp->refCount);
-    afsi_log("%s:%d cm_HoldSCache scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_HoldSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCache scp 0x%p ref %d", file, line, scp, refCount);
 #endif
-    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ReleaseRead(&cm_scacheLock);
 }
 
 #ifdef DEBUG_REFCOUNT
@@ -1494,14 +1865,49 @@ void cm_ReleaseSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
 void cm_ReleaseSCacheNoLock(cm_scache_t *scp)
 #endif
 {
-    osi_assert(scp != NULL);
-    if (scp->refCount == 0)
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_AssertAny(&cm_scacheLock);
+
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
        osi_Log1(afsd_logp,"cm_ReleaseSCacheNoLock about to panic scp 0x%x",scp);
-    osi_assert(scp->refCount-- >= 0);
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_ReleaseSCacheNoLock scp 0x%p ref %d",scp,scp->refCount);
-    afsi_log("%s:%d cm_ReleaseSCacheNoLock scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_ReleaseSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCacheNoLock scp 0x%p ref %d", file, line, scp, refCount);
 #endif
+
+    if (refCount == 0 && (scp->flags & CM_SCACHEFLAG_DELETED)) {
+        int deleted = 0;
+        long      lockstate;
+
+        lockstate = lock_GetRWLockState(&cm_scacheLock);
+        if (lockstate != OSI_RWLOCK_WRITEHELD) 
+            lock_ReleaseRead(&cm_scacheLock);
+        else
+            lock_ReleaseWrite(&cm_scacheLock);
+
+        lock_ObtainWrite(&scp->rw);
+        if (scp->flags & CM_SCACHEFLAG_DELETED)
+            deleted = 1;
+
+        if (refCount == 0 && deleted) {
+            lock_ObtainWrite(&cm_scacheLock);
+            cm_RecycleSCache(scp, 0);
+            if (lockstate != OSI_RWLOCK_WRITEHELD) 
+                lock_ConvertWToR(&cm_scacheLock);
+        } else {
+            if (lockstate != OSI_RWLOCK_WRITEHELD) 
+                lock_ObtainRead(&cm_scacheLock);
+            else
+                lock_ObtainWrite(&cm_scacheLock);
+        }
+        lock_ReleaseWrite(&scp->rw);
+    }
 }
 
 #ifdef DEBUG_REFCOUNT
@@ -1509,18 +1915,35 @@ void cm_ReleaseSCacheDbg(cm_scache_t *scp, char * file, long line)
 #else
 void cm_ReleaseSCache(cm_scache_t *scp)
 #endif
-{
-    osi_assert(scp != NULL);
-    lock_ObtainWrite(&cm_scacheLock);
-    if (scp->refCount == 0)
+{     
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
        osi_Log1(afsd_logp,"cm_ReleaseSCache about to panic scp 0x%x",scp);
-    osi_assert(scp->refCount != 0);
-    scp->refCount--;
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_ReleaseSCache scp 0x%p ref %d",scp,scp->refCount);
-    afsi_log("%s:%d cm_ReleaseSCache scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_ReleaseSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCache scp 0x%p ref %d", file, line, scp, refCount);
 #endif
-    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ReleaseRead(&cm_scacheLock);
+
+    if (scp->flags & CM_SCACHEFLAG_DELETED) {
+        int deleted = 0;
+        lock_ObtainWrite(&scp->rw);
+        if (scp->flags & CM_SCACHEFLAG_DELETED)
+            deleted = 1;
+        if (deleted) {
+            lock_ObtainWrite(&cm_scacheLock);
+            cm_RecycleSCache(scp, 0);
+            lock_ReleaseWrite(&cm_scacheLock);
+        }
+        lock_ReleaseWrite(&scp->rw);
+    }
 }
 
 /* just look for the scp entry to get filetype */
@@ -1532,10 +1955,10 @@ int cm_FindFileType(cm_fid_t *fidp)
         
     hash = CM_SCACHE_HASH(fidp);
         
-    osi_assert(fidp->cell != 0);
+    osi_assertx(fidp->cell != 0, "unassigned cell value");
 
     lock_ObtainWrite(&cm_scacheLock);
-    for (scp=cm_data.hashTablep[hash]; scp; scp=scp->nextp) {
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
             lock_ReleaseWrite(&cm_scacheLock);
             return scp->fileType;
@@ -1553,46 +1976,120 @@ int cm_DumpSCache(FILE *outputFile, char *cookie, int lock)
 {
     int zilch;
     cm_scache_t *scp;
-    char output[1024];
+    osi_queue_t *q;
+    char output[2048];
     int i;
   
     if (lock)
         lock_ObtainRead(&cm_scacheLock);
   
-    sprintf(output, "%s - dumping scache - cm_data.currentSCaches=%d, cm_data.maxSCaches=%d\r\n", cookie, cm_data.currentSCaches, cm_data.maxSCaches);
+    sprintf(output, "%s - dumping all scache - cm_data.currentSCaches=%d, cm_data.maxSCaches=%d\r\n", cookie, cm_data.currentSCaches, cm_data.maxSCaches);
     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
   
-    for (scp = cm_data.scacheLRULastp; scp; scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
+    for (scp = cm_data.allSCachesp; scp; scp = scp->allNextp) 
     {
-        if (scp->refCount != 0)
-        {
-            sprintf(output, "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%u\r\n", 
-                    cookie, scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique, 
-                    scp->refCount);
+        time_t t;
+        char *srvStr = NULL;
+        afs_uint32 srvStrRpc = TRUE;
+        char *cbt = NULL;
+        char *cdrot = NULL;
+
+        if (scp->cbServerp) {
+            if (!((scp->cbServerp->flags & CM_SERVERFLAG_UUID) &&
+                UuidToString((UUID *)&scp->cbServerp->uuid, &srvStr) == RPC_S_OK)) {
+                afs_asprintf(&srvStr, "%.0I", scp->cbServerp->addr.sin_addr.s_addr);
+                srvStrRpc = FALSE;
+            }
+        }
+        if (scp->cbExpires) {
+            t = scp->cbExpires;
+            cbt = ctime(&t);
+            if (cbt) {
+                cbt = strdup(cbt);
+                cbt[strlen(cbt)-1] = '\0';
+            }
+        }
+        if (scp->volumeCreationDate) {
+            t = scp->volumeCreationDate;
+            cdrot = ctime(&t);
+            if (cdrot) {
+                cdrot = strdup(cdrot);
+                cdrot[strlen(cdrot)-1] = '\0';
+            }
+        }
+        sprintf(output,
+                "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) type=%d dv=%I64d len=0x%I64x "
+                "mp='%s' Locks (server=0x%x shared=%d excl=%d clnt=%d) fsLockCount=%d linkCount=%d anyAccess=0x%x "
+                "flags=0x%x cbServer='%s' cbExpires='%s' volumeCreationDate='%s' refCount=%u\r\n",
+                cookie, scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique,
+                scp->fileType, scp->dataVersion, scp->length.QuadPart, scp->mountPointStringp,
+                scp->serverLock, scp->sharedLocks, scp->exclusiveLocks, scp->clientLocks, scp->fsLockCount,
+                scp->linkCount, scp->anyAccess, scp->flags, srvStr ? srvStr : "<none>", cbt ? cbt : "<none>",
+                cdrot ? cdrot : "<none>", scp->refCount);
+        WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+
+        if (scp->fileLocksH) {
+            sprintf(output, "  %s - begin dumping scp locks\r\n", cookie);
             WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+
+            for (q = scp->fileLocksH; q; q = osi_QNext(q)) {
+                cm_file_lock_t * lockp = (cm_file_lock_t *)((char *) q - offsetof(cm_file_lock_t, fileq));
+                sprintf(output, "  %s lockp=0x%p scp=0x%p, cm_userp=0x%p offset=0x%I64x len=0x%08I64x type=0x%x "
+                        "key=0x%I64x flags=0x%x update=0x%I64u\r\n",
+                        cookie, lockp, lockp->scp, lockp->userp, lockp->range.offset, lockp->range.length,
+                        lockp->lockType, lockp->key, lockp->flags, (afs_uint64)lockp->lastUpdate);
+                WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+            }       
+
+            sprintf(output, "  %s - done dumping scp locks\r\n", cookie);
+            WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+        }
+
+        if (srvStr) {
+            if (srvStrRpc)
+                RpcStringFree(&srvStr);
+            else
+                free(srvStr);
         }
+        if (cbt)
+            free(cbt);
+        if (cdrot)
+            free(cdrot);
     }
   
-    sprintf(output, "%s - dumping cm_data.hashTable - cm_data.hashTableSize=%d\r\n", cookie, cm_data.hashTableSize);
+    sprintf(output, "%s - Done dumping all scache.\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+    sprintf(output, "%s - dumping cm_data.scacheHashTable - cm_data.scacheHashTableSize=%d\r\n",
+            cookie, cm_data.scacheHashTableSize);
     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
   
-    for (i = 0; i < cm_data.hashTableSize; i++)
+    for (i = 0; i < cm_data.scacheHashTableSize; i++)
     {
-        for(scp = cm_data.hashTablep[i]; scp; scp=scp->nextp) 
+        for(scp = cm_data.scacheHashTablep[i]; scp; scp=scp->nextp) 
         {
-            if (scp->refCount != 0)
-            {
-                sprintf(output, "%s scp=0x%p, hash=%d, fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%u\r\n", 
-                         cookie, scp, i, scp->fid.cell, scp->fid.volume, scp->fid.vnode, 
-                         scp->fid.unique, scp->refCount);
-                WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
-            }
+            sprintf(output, "%s scp=0x%p, hash=%d, fid (cell=%d, volume=%d, vnode=%d, unique=%d)\r\n", 
+                    cookie, scp, i, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique);
+            WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
         }
     }
 
-    sprintf(output, "%s - Done dumping scache.\r\n", cookie);
+    sprintf(output, "%s - Done dumping cm_data.scacheHashTable\r\n", cookie);
     WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
-  
+
+    sprintf(output, "%s - begin dumping all file locks\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+
+    for (q = cm_allFileLocks; q; q = osi_QNext(q)) {
+        cm_file_lock_t * lockp = (cm_file_lock_t *)q;
+        sprintf(output, "%s filelockp=0x%p scp=0x%p, cm_userp=0x%p offset=0x%I64x len=0x%08I64x type=0x%x key=0x%I64x flags=0x%x update=0x%I64u\r\n", 
+                 cookie, lockp, lockp->scp, lockp->userp, lockp->range.offset, lockp->range.length, 
+                 lockp->lockType, lockp->key, lockp->flags, (afs_uint64)lockp->lastUpdate);
+        WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+    }       
+
+    sprintf(output, "%s - done dumping all file locks\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+
     if (lock)
         lock_ReleaseRead(&cm_scacheLock);       
     return (0);