windows-scache-verification-20080307
[openafs.git] / src / WINNT / afsd / cm_scache.c
index 54d6e75..edd0733 100644 (file)
 #include <afs/param.h>
 #include <afs/stds.h>
 
-#ifndef DJGPP
 #include <windows.h>
 #include <winsock2.h>
 #include <nb30.h>
-#endif /* !DJGPP */
 #include <malloc.h>
 #include <string.h>
 #include <stdlib.h>
 #include <osi.h>
 
 #include "afsd.h"
+#include "cm_btree.h"
 
 /*extern void afsi_log(char *pattern, ...);*/
 
 extern osi_hyper_t hzero;
 
-/* hash table stuff */
-cm_scache_t **cm_hashTablep;
-long cm_hashTableSize;
-long cm_maxSCaches;
-long cm_currentSCaches;
-
-/* LRU stuff */
-cm_scache_t *cm_scacheLRUFirstp;
-cm_scache_t *cm_scacheLRULastp;
-
 /* File locks */
 osi_queue_t *cm_allFileLocks;
+osi_queue_t *cm_freeFileLocks;
+unsigned long cm_lockRefreshCycle;
 
 /* lock for globals */
 osi_rwlock_t cm_scacheLock;
@@ -45,352 +36,958 @@ osi_rwlock_t cm_scacheLock;
 /* Dummy scache entry for use with pioctl fids */
 cm_scache_t cm_fakeSCache;
 
+osi_queue_t * cm_allFreeWaiters;        /* protected by cm_scacheLock */
+
 #ifdef AFS_FREELANCE_CLIENT
 extern osi_mutex_t cm_Freelance_Lock;
 #endif
 
 /* must be called with cm_scacheLock write-locked! */
-void cm_AdjustLRU(cm_scache_t *scp)
+void cm_AdjustScacheLRU(cm_scache_t *scp)
 {
-       if (scp == cm_scacheLRULastp)
-               cm_scacheLRULastp = (cm_scache_t *) osi_QPrev(&scp->q);
-       osi_QRemove((osi_queue_t **) &cm_scacheLRUFirstp, &scp->q);
-       osi_QAdd((osi_queue_t **) &cm_scacheLRUFirstp, &scp->q);
-       if (!cm_scacheLRULastp) cm_scacheLRULastp = scp;
+    if (scp == cm_data.scacheLRULastp)
+        cm_data.scacheLRULastp = (cm_scache_t *) osi_QPrev(&scp->q);
+    osi_QRemoveHT((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **) &cm_data.scacheLRULastp, &scp->q);
+    osi_QAdd((osi_queue_t **) &cm_data.scacheLRUFirstp, &scp->q);
+    if (!cm_data.scacheLRULastp) 
+        cm_data.scacheLRULastp = scp;
 }
 
+/* call with scache write-locked and mutex held */
+void cm_RemoveSCacheFromHashTable(cm_scache_t *scp)
+{
+    cm_scache_t **lscpp;
+    cm_scache_t *tscp;
+    int i;
+       
+    if (scp->flags & CM_SCACHEFLAG_INHASH) {
+       /* hash it out first */
+       i = CM_SCACHE_HASH(&scp->fid);
+       for (lscpp = &cm_data.scacheHashTablep[i], tscp = cm_data.scacheHashTablep[i];
+            tscp;
+            lscpp = &tscp->nextp, tscp = tscp->nextp) {
+           if (tscp == scp) {
+               *lscpp = scp->nextp;
+               scp->flags &= ~CM_SCACHEFLAG_INHASH;
+               break;
+           }
+       }
+    }
+}
+
+/* called with cm_scacheLock write-locked; recycles an existing scp. 
+ *
+ * this function ignores all of the locking hierarchy.  
+ */
+long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
+{
+    if (scp->refCount != 0) {
+       return -1;
+    }
+
+    if (scp->flags & CM_SCACHEFLAG_SMB_FID) {
+       osi_Log1(afsd_logp,"cm_RecycleSCache CM_SCACHEFLAG_SMB_FID detected scp 0x%p", scp);
+#ifdef DEBUG
+       osi_panic("cm_RecycleSCache CM_SCACHEFLAG_SMB_FID detected",__FILE__,__LINE__);
+#endif
+       return -1;
+    }
+
+    cm_RemoveSCacheFromHashTable(scp);
+
+#if 0
+    if (flags & CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS) {
+       osi_queueData_t *qdp;
+       cm_buf_t *bufp;
+
+       while(qdp = scp->bufWritesp) {
+            bufp = osi_GetQData(qdp);
+           osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q);
+           osi_QDFree(qdp);
+           if (bufp) {
+               lock_ObtainMutex(&bufp->mx);
+               bufp->cmFlags &= ~CM_BUF_CMSTORING;
+               bufp->flags &= ~CM_BUF_DIRTY;
+                bufp->dirty_offset = 0;
+                bufp->dirty_length = 0;
+               bufp->flags |= CM_BUF_ERROR;
+               bufp->error = VNOVNODE;
+               bufp->dataVersion = -1; /* bad */
+               bufp->dirtyCounter++;
+               if (bufp->flags & CM_BUF_WAITING) {
+                   osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp);
+                   osi_Wakeup((long) &bufp);
+               }
+               lock_ReleaseMutex(&bufp->mx);
+               buf_Release(bufp);
+           }
+        }
+       while(qdp = scp->bufReadsp) {
+            bufp = osi_GetQData(qdp);
+           osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q);
+           osi_QDFree(qdp);
+           if (bufp) {
+               lock_ObtainMutex(&bufp->mx);
+               bufp->cmFlags &= ~CM_BUF_CMFETCHING;
+               bufp->flags &= ~CM_BUF_DIRTY;
+                bufp->dirty_offset = 0;
+                bufp->dirty_length = 0;
+               bufp->flags |= CM_BUF_ERROR;
+               bufp->error = VNOVNODE;
+               bufp->dataVersion = -1; /* bad */
+               bufp->dirtyCounter++;
+               if (bufp->flags & CM_BUF_WAITING) {
+                   osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp);
+                   osi_Wakeup((long) &bufp);
+               }
+               lock_ReleaseMutex(&bufp->mx);
+               buf_Release(bufp);
+           }
+        }
+       buf_CleanDirtyBuffers(scp); 
+    } else {
+       /* look for things that shouldn't still be set */
+       osi_assertx(scp->bufWritesp == NULL, "non-null cm_scache_t bufWritesp");
+       osi_assertx(scp->bufReadsp == NULL, "non-null cm_scache_t bufReadsp");
+    }
+#endif
+
+    /* invalidate so next merge works fine;
+     * also initialize some flags */
+    scp->fileType = 0;
+    scp->flags &= ~(CM_SCACHEFLAG_STATD
+                    | CM_SCACHEFLAG_DELETED
+                    | CM_SCACHEFLAG_RO
+                    | CM_SCACHEFLAG_PURERO
+                    | CM_SCACHEFLAG_OVERQUOTA
+                    | CM_SCACHEFLAG_OUTOFSPACE
+                    | CM_SCACHEFLAG_EACCESS);
+    scp->serverModTime = 0;
+    scp->dataVersion = 0;
+    scp->bufDataVersionLow = 0;
+    scp->bulkStatProgress = hzero;
+    scp->waitCount = 0;
+    scp->waitQueueT = NULL;
+
+    if (scp->cbServerp) {
+        cm_PutServer(scp->cbServerp);
+        scp->cbServerp = NULL;
+    }
+    scp->cbExpires = 0;
+
+    scp->fid.vnode = 0;
+    scp->fid.volume = 0;
+    scp->fid.unique = 0;
+    scp->fid.cell = 0;
+    scp->fid.hash = 0;
+
+    /* remove from dnlc */
+    cm_dnlcPurgedp(scp);
+    cm_dnlcPurgevp(scp);
+
+    /* discard cached status; if non-zero, Close
+     * tried to store this to server but failed */
+    scp->mask = 0;
+
+    /* drop held volume ref */
+    if (scp->volp) {
+       cm_PutVolume(scp->volp);
+       scp->volp = NULL;
+    }
+
+    /* discard symlink info */
+    scp->mountPointStringp[0] = '\0';
+    memset(&scp->mountRootFid, 0, sizeof(cm_fid_t));
+    memset(&scp->dotdotFid, 0, sizeof(cm_fid_t));
+
+    /* reset locking info */
+    scp->fileLocksH = NULL;
+    scp->fileLocksT = NULL;
+    scp->serverLock = (-1);
+    scp->exclusiveLocks = 0;
+    scp->sharedLocks = 0;
+    scp->lockDataVersion = -1;
+
+    /* not locked, but there can be no references to this guy
+     * while we hold the global refcount lock.
+     */
+    cm_FreeAllACLEnts(scp);
+
+#ifdef USE_BPLUS
+    /* destroy directory Bplus Tree */
+    if (scp->dirBplus) {
+        LARGE_INTEGER start, end;
+        QueryPerformanceCounter(&start);
+        bplus_free_tree++;
+        freeBtree(scp->dirBplus);
+        scp->dirBplus = NULL;
+        QueryPerformanceCounter(&end);
+
+        bplus_free_time += (end.QuadPart - start.QuadPart);
+    }
+#endif
+    return 0;
+}
+
+
 /* called with cm_scacheLock write-locked; find a vnode to recycle.
- * Can allocate a new one if desperate, or if below quota (cm_maxSCaches).
+ * Can allocate a new one if desperate, or if below quota (cm_data.maxSCaches).
  */
 cm_scache_t *cm_GetNewSCache(void)
 {
-       cm_scache_t *scp;
-        int i;
-        cm_scache_t **lscpp;
-        cm_scache_t *tscp;
-
-       if (cm_currentSCaches >= cm_maxSCaches) {
-               for (scp = cm_scacheLRULastp;
-                    scp;
-                    scp = (cm_scache_t *) osi_QPrev(&scp->q)) {
-                 if (scp->refCount == 0) break;
-               }
-                
-                if (scp) {
-                       /* we found an entry, so return it */
-                        if (scp->flags & CM_SCACHEFLAG_INHASH) {
-                               /* hash it out first */
-                                i = CM_SCACHE_HASH(&scp->fid);
-                               lscpp = &cm_hashTablep[i];
-                               for (tscp = *lscpp;
-                                    tscp;
-                                    lscpp = &tscp->nextp, tscp = *lscpp) {
-                                 if (tscp == scp) break;
-                                }
-                                osi_assertx(tscp, "afsd: scache hash screwup");
-                                *lscpp = scp->nextp;
-                                scp->flags &= ~CM_SCACHEFLAG_INHASH;
-                        }
-
-                       /* look for things that shouldn't still be set */
-                        osi_assert(scp->bufWritesp == NULL);
-                        osi_assert(scp->bufReadsp == NULL);
-
-                       /* invalidate so next merge works fine;
-                        * also initialize some flags */
-                        scp->flags &= ~(CM_SCACHEFLAG_STATD
-                                       | CM_SCACHEFLAG_RO
-                                       | CM_SCACHEFLAG_PURERO
-                                       | CM_SCACHEFLAG_OVERQUOTA
-                                       | CM_SCACHEFLAG_OUTOFSPACE);
-                        scp->serverModTime = 0;
-                        scp->dataVersion = 0;
-                       scp->bulkStatProgress = hzero;
-
-                        /* discard callback */
-                        scp->cbServerp = NULL;
-                        scp->cbExpires = 0;
-
-                       /* remove from dnlc */
-                       cm_dnlcPurgedp(scp);
-                       cm_dnlcPurgevp(scp);
-
-                       /* discard cached status; if non-zero, Close
-                        * tried to store this to server but failed */
-                       scp->mask = 0;
-
-                       /* drop held volume ref */
-                       if (scp->volp) {
-                               cm_PutVolume(scp->volp);
-                               scp->volp = NULL;
-                       }
-
-                        /* discard symlink info */
-                        if (scp->mountPointStringp) {
-                               free(scp->mountPointStringp);
-                                scp->mountPointStringp = NULL;
-                       }
-                       if (scp->mountRootFidp) {
-                               free(scp->mountRootFidp);
-                               scp->mountRootFidp = NULL;
-                       }
-                       if (scp->dotdotFidp) {
-                               free(scp->dotdotFidp);
-                               scp->dotdotFidp = NULL;
-                       }
-                        
-                       /* not locked, but there can be no references to this guy
-                         * while we hold the global refcount lock.
-                         */
-                        cm_FreeAllACLEnts(scp);
-                        
+    cm_scache_t *scp;
+    int retry = 0;
+
+#if 0
+    /* first pass - look for deleted objects */
+    for ( scp = cm_data.scacheLRULastp;
+         scp;
+         scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
+    {
+       osi_assertx(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.scacheHashTablep,
+                    "invalid cm_scache_t address");
+
+       if (scp->refCount == 0) {
+           if (scp->flags & CM_SCACHEFLAG_DELETED) {
+               osi_Log1(afsd_logp, "GetNewSCache attempting to recycle deleted scp 0x%x", scp);
+               if (!cm_RecycleSCache(scp, CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS)) {
+
+                   /* we found an entry, so return it */
+                   /* now remove from the LRU queue and put it back at the
+                    * head of the LRU queue.
+                    */
+                   cm_AdjustScacheLRU(scp);
+
+                   /* and we're done */
+                   return scp;
+               } 
+               osi_Log1(afsd_logp, "GetNewSCache recycled failed scp 0x%x", scp);
+           } else if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
+               /* we found an entry, so return it */
+               /* now remove from the LRU queue and put it back at the
+               * head of the LRU queue.
+               */
+               cm_AdjustScacheLRU(scp);
+
+               /* and we're done */
+               return scp;
+           }
+       }       
+    }  
+    osi_Log0(afsd_logp, "GetNewSCache no deleted or recycled entries available for reuse");
+#endif 
+
+    if (cm_data.currentSCaches >= cm_data.maxSCaches) {
+       /* There were no deleted scache objects that we could use.  Try to find
+        * one that simply hasn't been used in a while.
+        */
+        for ( scp = cm_data.scacheLRULastp;
+              scp;
+              scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
+        {
+            /* It is possible for the refCount to be zero and for there still
+             * to be outstanding dirty buffers.  If there are dirty buffers,
+             * we must not recycle the scp. */
+            if (scp->refCount == 0 && scp->bufReadsp == NULL && scp->bufWritesp == NULL) {
+                if (!buf_DirtyBuffersExist(&scp->fid)) {
+                    if (!cm_RecycleSCache(scp, 0)) {
+                        /* we found an entry, so return it */
                         /* now remove from the LRU queue and put it back at the
                          * head of the LRU queue.
                          */
-                       cm_AdjustLRU(scp);
-                       
+                        cm_AdjustScacheLRU(scp);
+
                         /* and we're done */
                         return scp;
+                    }
+                } else {
+                    osi_Log1(afsd_logp,"GetNewSCache dirty buffers exist scp 0x%x", scp);
                 }
-       }
+            }  
+        }
+        osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry);
+
+        return NULL;
+    }
         
-        /* if we get here, we should allocate a new scache entry.  We either are below
-         * quota or we have a leak and need to allocate a new one to avoid panicing.
-         */
-        scp = malloc(sizeof(*scp));
-        memset(scp, 0, sizeof(*scp));
-       lock_InitializeMutex(&scp->mx, "cm_scache_t mutex");
-        lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
-       
-        /* and put it in the LRU queue */
-        osi_QAdd((osi_queue_t **) &cm_scacheLRUFirstp, &scp->q);
-        if (!cm_scacheLRULastp) cm_scacheLRULastp = scp;
-        cm_currentSCaches++;
-       cm_dnlcPurgedp(scp); /* make doubly sure that this is not in dnlc */
-       cm_dnlcPurgevp(scp); 
-        return scp;
+    /* if we get here, we should allocate a new scache entry.  We either are below
+     * quota or we have a leak and need to allocate a new one to avoid panicing.
+     */
+    scp = cm_data.scacheBaseAddress + cm_data.currentSCaches;
+    osi_assertx(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.scacheHashTablep,
+                "invalid cm_scache_t address");
+    memset(scp, 0, sizeof(cm_scache_t));
+    scp->magic = CM_SCACHE_MAGIC;
+    lock_InitializeRWLock(&scp->rw, "cm_scache_t rw");
+    lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
+#ifdef USE_BPLUS
+    lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock");
+#endif
+    scp->serverLock = -1;
+
+    /* and put it in the LRU queue */
+    osi_QAdd((osi_queue_t **) &cm_data.scacheLRUFirstp, &scp->q);
+    if (!cm_data.scacheLRULastp) 
+        cm_data.scacheLRULastp = scp;
+    cm_data.currentSCaches++;
+    cm_dnlcPurgedp(scp); /* make doubly sure that this is not in dnlc */
+    cm_dnlcPurgevp(scp); 
+    scp->allNextp = cm_data.allSCachesp;
+    cm_data.allSCachesp = scp;
+    return scp;
+}       
+
+void cm_SetFid(cm_fid_t *fidp, afs_uint32 cell, afs_uint32 volume, afs_uint32 vnode, afs_uint32 unique)
+{
+    fidp->cell = cell;
+    fidp->volume = volume;
+    fidp->vnode = vnode;
+    fidp->unique = unique;
+    fidp->hash = ((cell & 0xF) << 28) | ((volume & 0x3F) << 22) | ((vnode & 0x7FF) << 11) | (unique & 0x7FF);
 }
 
 /* like strcmp, only for fids */
-int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
+__inline int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
 {
-        if (ap->vnode != bp->vnode) return 1;
-       if (ap->volume != bp->volume) return 1;
-        if (ap->unique != bp->unique) return 1;
-        if (ap->cell != bp->cell) return 1;
-        return 0;
+    if (ap->hash != bp->hash)
+        return 1;
+    if (ap->vnode != bp->vnode)
+        return 1;
+    if (ap->volume != bp->volume) 
+        return 1;
+    if (ap->unique != bp->unique) 
+        return 1;
+    if (ap->cell != bp->cell) 
+        return 1;
+    return 0;
 }
 
-void cm_fakeSCacheInit()
+void cm_fakeSCacheInit(int newFile)
 {
-       memset(&cm_fakeSCache, 0, sizeof(cm_fakeSCache));
-       lock_InitializeMutex(&cm_fakeSCache.mx, "cm_scache_t mutex");
-       cm_fakeSCache.cbServerp = (struct cm_server *)(-1);
-       /* can leave clientModTime at 0 */
-       cm_fakeSCache.fileType = CM_SCACHETYPE_FILE;
-       cm_fakeSCache.unixModeBits = 0777;
-       cm_fakeSCache.length.LowPart = 1000;
-       cm_fakeSCache.linkCount = 1;
+    if ( newFile ) {
+        memset(&cm_data.fakeSCache, 0, sizeof(cm_scache_t));
+        cm_data.fakeSCache.cbServerp = (struct cm_server *)(-1);
+        /* can leave clientModTime at 0 */
+        cm_data.fakeSCache.fileType = CM_SCACHETYPE_FILE;
+        cm_data.fakeSCache.unixModeBits = 0777;
+        cm_data.fakeSCache.length.LowPart = 1000;
+        cm_data.fakeSCache.linkCount = 1;
+        cm_data.fakeSCache.refCount = 1;
+    }
+    lock_InitializeRWLock(&cm_data.fakeSCache.rw, "cm_scache_t rw");
 }
 
-void cm_InitSCache(long maxSCaches)
+long
+cm_ValidateSCache(void)
 {
-       static osi_once_t once;
+    cm_scache_t * scp, *lscp;
+    long i;
+
+    if ( cm_data.scacheLRUFirstp == NULL && cm_data.scacheLRULastp != NULL ||
+         cm_data.scacheLRUFirstp != NULL && cm_data.scacheLRULastp == NULL) {
+        afsi_log("cm_ValidateSCache failure: inconsistent LRU pointers");
+        fprintf(stderr, "cm_ValidateSCache failure: inconsistent LRU pointers\n");
+        return -17;
+    }
+
+    for ( scp = cm_data.scacheLRUFirstp, lscp = NULL, i = 0; 
+          scp;
+          lscp = scp, scp = (cm_scache_t *) osi_QNext(&scp->q), i++ ) {
+        if (scp->magic != CM_SCACHE_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC\n");
+            return -1;
+        }
+        if (scp->nextp && scp->nextp->magic != CM_SCACHE_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC\n");
+            return -2;
+        }
+        if (scp->randomACLp && scp->randomACLp->magic != CM_ACLENT_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
+            return -3;
+        }
+        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
+            return -4;
+        }
+        if (i > cm_data.currentSCaches ) {
+            afsi_log("cm_ValidateSCache failure: LRU First queue loops");
+            fprintf(stderr, "cm_ValidateSCache failure: LUR First queue loops\n");
+            return -13;
+        }
+        if (lscp != (cm_scache_t *) osi_QPrev(&scp->q)) {
+            afsi_log("cm_ValidateSCache failure: QPrev(scp) != previous");
+            fprintf(stderr, "cm_ValidateSCache failure: QPrev(scp) != previous\n");
+            return -15;
+        }
+    }
+
+    for ( scp = cm_data.scacheLRULastp, lscp = NULL, i = 0; scp;
+          lscp = scp, scp = (cm_scache_t *) osi_QPrev(&scp->q), i++ ) {
+        if (scp->magic != CM_SCACHE_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC\n");
+            return -5;
+        }
+        if (scp->nextp && scp->nextp->magic != CM_SCACHE_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC\n");
+            return -6;
+        }
+        if (scp->randomACLp && scp->randomACLp->magic != CM_ACLENT_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
+            return -7;
+        }
+        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
+            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
+            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
+            return -8;
+        }
+        if (i > cm_data.currentSCaches ) {
+            afsi_log("cm_ValidateSCache failure: LRU Last queue loops");
+            fprintf(stderr, "cm_ValidateSCache failure: LUR Last queue loops\n");
+            return -14;
+        }
+        if (lscp != (cm_scache_t *) osi_QNext(&scp->q)) {
+            afsi_log("cm_ValidateSCache failure: QNext(scp) != next");
+            fprintf(stderr, "cm_ValidateSCache failure: QNext(scp) != next\n");
+            return -16;
+        }
+    }
+
+    for ( i=0; i < cm_data.scacheHashTableSize; i++ ) {
+        for ( scp = cm_data.scacheHashTablep[i]; scp; scp = scp->nextp ) {
+            afs_uint32 hash;
+            hash = CM_SCACHE_HASH(&scp->fid);
+            if (scp->magic != CM_SCACHE_MAGIC) {
+                afsi_log("cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC");
+                fprintf(stderr, "cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC\n");
+                return -9;
+            }
+            if (scp->nextp && scp->nextp->magic != CM_SCACHE_MAGIC) {
+                afsi_log("cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC");
+                fprintf(stderr, "cm_ValidateSCache failure: scp->nextp->magic != CM_SCACHE_MAGIC\n");
+                return -10;
+            }
+            if (scp->randomACLp && scp->randomACLp->magic != CM_ACLENT_MAGIC) {
+                afsi_log("cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC");
+                fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
+                return -11;
+            }
+            if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
+                afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
+                fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
+                return -12;
+            }
+            if (hash != i) {
+                afsi_log("cm_ValidateSCache failure: scp hash != hash index");
+                fprintf(stderr, "cm_ValidateSCache failure: scp hash != hash index\n");
+                return -13;
+            }
+        }
+    }
+
+    return cm_dnlcValidate();
+}
+
+void
+cm_SuspendSCache(void)
+{
+    cm_scache_t * scp;
+    time_t now;
+
+    cm_GiveUpAllCallbacksAllServers(TRUE);
+
+    /* 
+     * After this call all servers are marked down.
+     * Do not clear the callbacks, instead change the
+     * expiration time so that the callbacks will be expired
+     * when the servers are marked back up.  However, we
+     * want the callbacks to be preserved as long as the 
+     * servers are down.  That way if the machine resumes
+     * without network, the stat cache item will still be
+     * considered valid.
+     */
+    now = time(NULL);
+
+    lock_ObtainWrite(&cm_scacheLock);
+    for ( scp = cm_data.allSCachesp; scp; scp = scp->allNextp ) {
+        if (scp->cbServerp) {
+            if (scp->flags & CM_SCACHEFLAG_PURERO && scp->volp) {
+                if (scp->volp->cbExpiresRO == scp->cbExpires) {
+                    scp->volp->cbExpiresRO = now+1;
+                }
+            }
+            scp->cbExpires = now+1;
+        }
+    }
+    lock_ReleaseWrite(&cm_scacheLock);
+}
+
+long
+cm_ShutdownSCache(void)
+{
+    cm_scache_t * scp;
+
+    lock_ObtainWrite(&cm_scacheLock);
+
+    for ( scp = cm_data.allSCachesp; scp;
+          scp = scp->allNextp ) {
+        if (scp->randomACLp) {
+            lock_ObtainWrite(&scp->rw);
+            cm_FreeAllACLEnts(scp);
+            lock_ReleaseWrite(&scp->rw);
+        }
+
+        if (scp->cbServerp) {
+            cm_PutServer(scp->cbServerp);
+            scp->cbServerp = NULL;
+        }
+        scp->cbExpires = 0;
+        scp->flags &= ~CM_SCACHEFLAG_CALLBACK;
+
+#ifdef USE_BPLUS
+        if (scp->dirBplus)
+            freeBtree(scp->dirBplus);
+        scp->dirBplus = NULL;
+        scp->dirDataVersion = -1;
+        lock_FinalizeRWLock(&scp->dirlock);
+#endif
+        lock_FinalizeRWLock(&scp->rw);
+        lock_FinalizeRWLock(&scp->bufCreateLock);
+    }
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    cm_GiveUpAllCallbacksAllServers(FALSE);
+
+    return cm_dnlcShutdown();
+}
+
+void cm_InitSCache(int newFile, long maxSCaches)
+{
+    static osi_once_t once;
         
-        if (osi_Once(&once)) {
-               lock_InitializeRWLock(&cm_scacheLock, "cm_scacheLock");
-                cm_hashTableSize = maxSCaches / 2;
-                cm_hashTablep = malloc(sizeof(cm_scache_t *) * cm_hashTableSize);
-                memset(cm_hashTablep, 0, sizeof(cm_scache_t *) * cm_hashTableSize);
-               cm_allFileLocks = NULL;
-                cm_currentSCaches = 0;
-                cm_maxSCaches = maxSCaches;
-               cm_fakeSCacheInit();
-               cm_dnlcInit();
-               osi_EndOnce(&once);
+    if (osi_Once(&once)) {
+        lock_InitializeRWLock(&cm_scacheLock, "cm_scacheLock");
+        if ( newFile ) {
+            memset(cm_data.scacheHashTablep, 0, sizeof(cm_scache_t *) * cm_data.scacheHashTableSize);
+            cm_data.allSCachesp = NULL;
+            cm_data.currentSCaches = 0;
+            cm_data.maxSCaches = maxSCaches;
+            cm_data.scacheLRUFirstp = cm_data.scacheLRULastp = NULL;
+        } else {
+            cm_scache_t * scp;
+
+            for ( scp = cm_data.allSCachesp; scp;
+                  scp = scp->allNextp ) {
+                lock_InitializeRWLock(&scp->rw, "cm_scache_t rw");
+                lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
+#ifdef USE_BPLUS
+                lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock");
+#endif
+                scp->cbServerp = NULL;
+                scp->cbExpires = 0;
+                scp->fileLocksH = NULL;
+                scp->fileLocksT = NULL;
+                scp->serverLock = (-1);
+                scp->lastRefreshCycle = 0;
+                scp->exclusiveLocks = 0;
+                scp->sharedLocks = 0;
+                scp->openReads = 0;
+                scp->openWrites = 0;
+                scp->openShares = 0;
+                scp->openExcls = 0;
+                scp->waitCount = 0;
+#ifdef USE_BPLUS
+                scp->dirBplus = NULL;
+                scp->dirDataVersion = -1;
+#endif
+                scp->waitQueueT = NULL;
+                scp->flags &= ~CM_SCACHEFLAG_WAITING;
+            }
         }
+        cm_allFileLocks = NULL;
+        cm_freeFileLocks = NULL;
+        cm_lockRefreshCycle = 0;
+        cm_fakeSCacheInit(newFile);
+        cm_allFreeWaiters = NULL;
+        cm_dnlcInit(newFile);
+        osi_EndOnce(&once);
+    }
 }
 
 /* version that doesn't bother creating the entry if we don't find it */
 cm_scache_t *cm_FindSCache(cm_fid_t *fidp)
 {
-       long hash;
-        cm_scache_t *scp;
-        
-        hash = CM_SCACHE_HASH(fidp);
-        
-       osi_assert(fidp->cell != 0);
+    long hash;
+    cm_scache_t *scp;
 
-        lock_ObtainWrite(&cm_scacheLock);
-       for(scp=cm_hashTablep[hash]; scp; scp=scp->nextp) {
-               if (cm_FidCmp(fidp, &scp->fid) == 0) {
-                       scp->refCount++;
-                        cm_AdjustLRU(scp);
-                        lock_ReleaseWrite(&cm_scacheLock);
-                       return scp;
-                }
+    hash = CM_SCACHE_HASH(fidp);
+
+    if (fidp->cell == 0) {
+       return NULL;
+    }
+
+    lock_ObtainWrite(&cm_scacheLock);
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
+        if (cm_FidCmp(fidp, &scp->fid) == 0) {
+            cm_HoldSCacheNoLock(scp);
+            cm_AdjustScacheLRU(scp);
+            lock_ReleaseWrite(&cm_scacheLock);
+            return scp;
         }
-        lock_ReleaseWrite(&cm_scacheLock);
-        return NULL;
+    }
+    lock_ReleaseWrite(&cm_scacheLock);
+    return NULL;
 }
 
+#ifdef DEBUG_REFCOUNT
+long cm_GetSCacheDbg(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
+                  cm_req_t *reqp, char * file, long line)
+#else
 long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
-       cm_req_t *reqp)
+                  cm_req_t *reqp)
+#endif
 {
-       long hash;
+    long hash;
     cm_scache_t *scp;
     long code;
-    cm_volume_t *volp;
+    cm_volume_t *volp = NULL;
     cm_cell_t *cellp;
-       char* mp;
-       int special; // yj: boolean variable to test if file is on root.afs
-       int isRoot;
+    int special = 0; // yj: boolean variable to test if file is on root.afs
+    int isRoot = 0;
+    extern cm_fid_t cm_rootFid;
         
     hash = CM_SCACHE_HASH(fidp);
         
-       osi_assert(fidp->cell != 0);
+    osi_assertx(fidp->cell != 0, "unassigned cell value");
+
+    if (fidp->cell== cm_data.rootFid.cell && 
+         fidp->volume==cm_data.rootFid.volume &&
+         fidp->vnode==0x0 && fidp->unique==0x0)
+    {
+        osi_Log0(afsd_logp,"cm_GetSCache called with root cell/volume and vnode=0 and unique=0");
+    }
 
-       // yj: check if we have the scp, if so, we don't need
-       // to do anything else
+    // yj: check if we have the scp, if so, we don't need
+    // to do anything else
     lock_ObtainWrite(&cm_scacheLock);
-       for(scp=cm_hashTablep[hash]; scp; scp=scp->nextp) {
-               if (cm_FidCmp(fidp, &scp->fid) == 0) {
-                       scp->refCount++;
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
+        if (cm_FidCmp(fidp, &scp->fid) == 0) {
+#ifdef DEBUG_REFCOUNT
+           afsi_log("%s:%d cm_GetSCache (1) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
+           osi_Log1(afsd_logp,"cm_GetSCache (1) outScpp 0x%p", scp);
+#endif
+            cm_HoldSCacheNoLock(scp);
             *outScpp = scp;
-            cm_AdjustLRU(scp);
+            cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
-                       return 0;
+            return 0;
         }
     }
-        
-       // yj: when we get here, it means we don't have an scp
-       // so we need to either load it or fake it, depending
-       // on whether the file is "special", see below.
-
-       // yj: if we're trying to get an scp for a file that's
-       // on root.afs of homecell, we want to handle it specially
-       // because we have to fill in the status stuff 'coz we
-       // don't want trybulkstat to fill it in for us
+
+    // yj: when we get here, it means we don't have an scp
+    // so we need to either load it or fake it, depending
+    // on whether the file is "special", see below.
+
+    // yj: if we're trying to get an scp for a file that's
+    // on root.afs of homecell, we want to handle it specially
+    // because we have to fill in the status stuff 'coz we
+    // don't want trybulkstat to fill it in for us
 #ifdef AFS_FREELANCE_CLIENT
-       special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+    special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
                fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-                          !(fidp->vnode==0x1 && fidp->unique==0x1));
-       isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+               !(fidp->vnode==0x1 && fidp->unique==0x1));
+    isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
               fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-                         fidp->vnode==0x1 && fidp->unique==0x1);
-       if (cm_freelanceEnabled && isRoot) {
-               osi_Log0(afsd_logp,"cm_getSCache Freelance and isRoot");
-          /* freelance: if we are trying to get the root scp for the first
-             time, we will just put in a place holder entry. */
-               volp = NULL;
-       }
+              fidp->vnode==0x1 && fidp->unique==0x1);
+    if (cm_freelanceEnabled && isRoot) {
+        osi_Log0(afsd_logp,"cm_GetSCache Freelance and isRoot");
+        /* freelance: if we are trying to get the root scp for the first
+         * time, we will just put in a place holder entry. 
+         */
+        volp = NULL;
+    }
          
-       if (cm_freelanceEnabled && special) {
-               osi_Log0(afsd_logp,"cm_getSCache Freelance and special");
-           lock_ObtainMutex(&cm_Freelance_Lock);
-               mp =(cm_localMountPoints+fidp->vnode-2)->mountPointStringp;
-               lock_ReleaseMutex(&cm_Freelance_Lock);
-               
-               scp = cm_GetNewSCache();
-               
-               scp->fid = *fidp;
-               scp->volp = cm_rootSCachep->volp;
-               if (scp->dotdotFidp == (cm_fid_t *) NULL)
-                       scp->dotdotFidp = (cm_fid_t *) malloc (sizeof(cm_fid_t));
-               scp->dotdotFidp->cell=AFS_FAKE_ROOT_CELL_ID;
-               scp->dotdotFidp->volume=AFS_FAKE_ROOT_VOL_ID;
-               scp->dotdotFidp->unique=1;
-               scp->dotdotFidp->vnode=1;
-               scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-               scp->nextp=cm_hashTablep[hash];
-               cm_hashTablep[hash]=scp;
-               scp->flags |= CM_SCACHEFLAG_INHASH;
-               scp->refCount = 1;
-               scp->fileType = CM_SCACHETYPE_MOUNTPOINT;
-
-               lock_ObtainMutex(&cm_Freelance_Lock);
-               scp->length.LowPart = strlen(mp)+4;
-               scp->mountPointStringp=malloc(strlen(mp)+1);
-               strcpy(scp->mountPointStringp,mp);
-               lock_ReleaseMutex(&cm_Freelance_Lock);
-
-               scp->owner=0x0;
-               scp->unixModeBits=0x1ff;
-               scp->clientModTime=FakeFreelanceModTime;
-               scp->serverModTime=FakeFreelanceModTime;
-               scp->parentUnique = 0x1;
-               scp->parentVnode=0x1;
-               scp->group=0;
-               scp->dataVersion=0x8;
-               *outScpp = scp;
-               lock_ReleaseWrite(&cm_scacheLock);
-               /*afsi_log("   getscache done");*/
-               return 0;
+    if (cm_freelanceEnabled && special) {
+        char mp[MOUNTPOINTLEN] = "";
+        afs_uint32 fileType;
+
+        osi_Log0(afsd_logp,"cm_GetSCache Freelance and special");
+        lock_ObtainMutex(&cm_Freelance_Lock);
+        if (fidp->vnode >= 2 && fidp->vnode - 2 < cm_noLocalMountPoints) {
+            strncpy(mp,(cm_localMountPoints+fidp->vnode-2)->mountPointStringp, MOUNTPOINTLEN);
+            mp[MOUNTPOINTLEN-1] = '\0';
+            if ( !strnicmp(mp, "msdfs:", strlen("msdfs:")) )
+                fileType = CM_SCACHETYPE_DFSLINK;
+            else
+                fileType = (cm_localMountPoints+fidp->vnode-2)->fileType;
+        } else {
+            fileType = CM_SCACHETYPE_INVALID;
 
+        }
+        lock_ReleaseMutex(&cm_Freelance_Lock);
+
+        scp = cm_GetNewSCache();
+       if (scp == NULL) {
+           osi_Log0(afsd_logp,"cm_GetSCache unable to obtain *new* scache entry");
+            lock_ReleaseWrite(&cm_scacheLock);
+           return CM_ERROR_WOULDBLOCK;
        }
-       // end of yj code
+
+#if not_too_dangerous
+       /* dropping the cm_scacheLock allows more than one thread
+        * to obtain the same cm_scache_t from the LRU list.  Since
+        * the refCount is known to be zero at this point we have to
+        * assume that no one else is using the one this is returned.
+        */
+       lock_ReleaseWrite(&cm_scacheLock);
+       lock_ObtainWrite(&scp->rw);
+       lock_ObtainWrite(&cm_scacheLock);
+#endif
+        scp->fid = *fidp;
+        scp->volp = cm_data.rootSCachep->volp;
+        if (scp->volp)
+           cm_GetVolume(scp->volp);    /* grab an additional reference */
+        scp->dotdotFid.cell=AFS_FAKE_ROOT_CELL_ID;
+        scp->dotdotFid.volume=AFS_FAKE_ROOT_VOL_ID;
+        scp->dotdotFid.unique=1;
+        scp->dotdotFid.vnode=1;
+        scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
+        scp->nextp=cm_data.scacheHashTablep[hash];
+        cm_data.scacheHashTablep[hash]=scp;
+        scp->flags |= CM_SCACHEFLAG_INHASH;
+        scp->refCount = 1;
+       osi_Log1(afsd_logp,"cm_GetSCache (freelance) sets refCount to 1 scp 0x%x", scp);
+        scp->fileType = fileType;
+        scp->length.LowPart = (DWORD)strlen(mp)+4;
+        scp->length.HighPart = 0;
+        strncpy(scp->mountPointStringp,mp,MOUNTPOINTLEN);
+        scp->owner=0x0;
+        scp->unixModeBits=0777;
+        scp->clientModTime=FakeFreelanceModTime;
+        scp->serverModTime=FakeFreelanceModTime;
+        scp->parentUnique = 0x1;
+        scp->parentVnode=0x1;
+        scp->group=0;
+        scp->dataVersion=cm_data.fakeDirVersion;
+        scp->bufDataVersionLow=cm_data.fakeDirVersion;
+        scp->lockDataVersion=-1; /* no lock yet */
+#if not_too_dangerous
+       lock_ReleaseWrite(&scp->rw);
+#endif
+       *outScpp = scp;
+        lock_ReleaseWrite(&cm_scacheLock);
+#ifdef DEBUG_REFCOUNT
+       afsi_log("%s:%d cm_GetSCache (2) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
+       osi_Log1(afsd_logp,"cm_GetSCache (2) outScpp 0x%p", scp);
+#endif
+        return 0;
+    }
+    // end of yj code
 #endif /* AFS_FREELANCE_CLIENT */
 
     /* otherwise, we need to find the volume */
-       if (!cm_freelanceEnabled || !isRoot) {
+    if (!cm_freelanceEnabled || !isRoot) {
         lock_ReleaseWrite(&cm_scacheLock);     /* for perf. reasons */
-        cellp = cm_FindCellByID(fidp->cell);
+        cellp = cm_FindCellByID(fidp->cell, 0);
         if (!cellp) 
             return CM_ERROR_NOSUCHCELL;
 
-        code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, &volp);
+        code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_CREATE, &volp);
         if (code) 
             return code;
         lock_ObtainWrite(&cm_scacheLock);
-       }
+    }
         
-        /* otherwise, we have the volume, now reverify that the scp doesn't
-         * exist, and proceed.
-         */
-       for(scp=cm_hashTablep[hash]; scp; scp=scp->nextp) {
-               if (cm_FidCmp(fidp, &scp->fid) == 0) {
-                       scp->refCount++;
-            cm_AdjustLRU(scp);
+    /* otherwise, we have the volume, now reverify that the scp doesn't
+     * exist, and proceed.
+     */
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
+        if (cm_FidCmp(fidp, &scp->fid) == 0) {
+#ifdef DEBUG_REFCOUNT
+           afsi_log("%s:%d cm_GetSCache (3) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
+           osi_Log1(afsd_logp,"cm_GetSCache (3) outScpp 0x%p", scp);
+#endif
+            cm_HoldSCacheNoLock(scp);
+            osi_assertx(scp->volp == volp, "cm_scache_t volume has unexpected value");
+            cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
-            cm_PutVolume(volp);
+            if (volp)
+                cm_PutVolume(volp);
             *outScpp = scp;
-                       return 0;
+            return 0;
         }
     }
         
     /* now, if we don't have the fid, recycle something */
-       scp = cm_GetNewSCache();
-       osi_assert(!(scp->flags & CM_SCACHEFLAG_INHASH));
-       scp->fid = *fidp;
-       scp->volp = volp;       /* a held reference */
-
-       if (!cm_freelanceEnabled || !isRoot) {
-         /* if this scache entry represents a volume root then we need 
-          * to copy the dotdotFipd from the volume structure where the 
-          * "master" copy is stored (defect 11489)
-          */
-         if(scp->fid.vnode == 1 && scp->fid.unique == 1 && volp->dotdotFidp) {
-           if (scp->dotdotFidp == (cm_fid_t *) NULL)
-             scp->dotdotFidp = (cm_fid_t *) malloc(sizeof(cm_fid_t));
-           *(scp->dotdotFidp) = *volp->dotdotFidp;
-         }
+    scp = cm_GetNewSCache();
+    if (scp == NULL) {
+       osi_Log0(afsd_logp,"cm_GetNewSCache unable to obtain *new* scache entry");
+       lock_ReleaseWrite(&cm_scacheLock);
+       if (volp)
+           cm_PutVolume(volp);
+       return CM_ERROR_WOULDBLOCK;
+    }
+    osi_Log2(afsd_logp,"cm_GetNewSCache returns scp 0x%x flags 0x%x", scp, scp->flags);
+
+    osi_assertx(!(scp->flags & CM_SCACHEFLAG_INHASH), "CM_SCACHEFLAG_INHASH set");
+
+#if not_too_dangerous
+    /* dropping the cm_scacheLock allows more than one thread
+     * to obtain the same cm_scache_t from the LRU list.  Since
+     * the refCount is known to be zero at this point we have to
+     * assume that no one else is using the one this is returned.
+     */
+    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ObtainWrite(&scp->rw);
+    lock_ObtainWrite(&cm_scacheLock);
+#endif
+    scp->fid = *fidp;
+    scp->volp = volp;  /* a held reference */
+
+    if (!cm_freelanceEnabled || !isRoot) {
+        /* if this scache entry represents a volume root then we need 
+         * to copy the dotdotFipd from the volume structure where the 
+         * "master" copy is stored (defect 11489)
+         */
+        if (scp->fid.vnode == 1 && scp->fid.unique == 1) {
+           scp->dotdotFid = volp->dotdotFid;
+        }
          
-         if (volp->roID == fidp->volume)
+        if (volp->ro.ID == fidp->volume)
            scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-         else if (volp->bkID == fidp->volume)
+        else if (volp->bk.ID == fidp->volume)
            scp->flags |= CM_SCACHEFLAG_RO;
-       }
-       scp->nextp = cm_hashTablep[hash];
-       cm_hashTablep[hash] = scp;
+    }
+    scp->nextp = cm_data.scacheHashTablep[hash];
+    cm_data.scacheHashTablep[hash] = scp;
     scp->flags |= CM_SCACHEFLAG_INHASH;
-       scp->refCount = 1;
+    scp->refCount = 1;
+    osi_Log1(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%x", scp);
+#if not_too_dangerous
+    lock_ReleaseWrite(&scp->rw);
+#endif
+
+    /* XXX - The following fields in the cm_scache are 
+     * uninitialized:
+     *   fileType
+     *   parentVnode
+     *   parentUnique
+     */
     lock_ReleaseWrite(&cm_scacheLock);
         
     /* now we have a held scache entry; just return it */
     *outScpp = scp;
+#ifdef DEBUG_REFCOUNT
+    afsi_log("%s:%d cm_GetSCache (4) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log1(afsd_logp,"cm_GetSCache (4) outScpp 0x%p", scp);
+#endif
     return 0;
 }
 
+/* Returns a held reference to the scache's parent 
+ * if it exists */
+cm_scache_t * cm_FindSCacheParent(cm_scache_t * scp)
+{
+    long code = 0;
+    int i;
+    cm_fid_t    parent_fid;
+    cm_scache_t * pscp = NULL;
+
+    lock_ObtainWrite(&cm_scacheLock);
+    cm_SetFid(&parent_fid, scp->fid.cell, scp->fid.volume, scp->parentVnode, scp->parentUnique);
+
+    if (cm_FidCmp(&scp->fid, &parent_fid)) {
+       i = CM_SCACHE_HASH(&parent_fid);
+       for (pscp = cm_data.scacheHashTablep[i]; pscp; pscp = pscp->nextp) {
+           if (!cm_FidCmp(&pscp->fid, &parent_fid)) {
+               cm_HoldSCacheNoLock(pscp);
+               break;
+           }
+       }
+    }
+
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    return pscp;
+}
+
+void cm_SyncOpAddToWaitQueue(cm_scache_t * scp, afs_int32 flags, cm_buf_t * bufp)
+{
+    cm_scache_waiter_t * w;
+
+    /* Do not use the queue for asynchronous store operations */
+    if (flags == CM_SCACHESYNC_ASYNCSTORE)
+        return;
+
+    lock_ObtainWrite(&cm_scacheLock);
+    if (cm_allFreeWaiters == NULL) {
+        w = malloc(sizeof(*w));
+        memset(w, 0, sizeof(*w));
+    } else {
+        w = (cm_scache_waiter_t *) cm_allFreeWaiters;
+        osi_QRemove(&cm_allFreeWaiters, (osi_queue_t *) w);
+    }
+
+    w->threadId = thrd_Current();
+    w->scp = scp;
+    cm_HoldSCacheNoLock(scp);
+    w->flags = flags;
+    w->bufp = bufp;
+
+    osi_QAddT(&scp->waitQueueH, &scp->waitQueueT, (osi_queue_t *) w);
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    osi_Log2(afsd_logp, "cm_SyncOpAddToWaitQueue : Adding thread to wait queue scp 0x%p w 0x%p", scp, w);
+}
+
+int cm_SyncOpCheckContinue(cm_scache_t * scp, afs_int32 flags, cm_buf_t * bufp)
+{
+    cm_scache_waiter_t * w;
+    int this_is_me;
+
+    /* Do not use the queue for asynchronous store operations */
+    if (flags == CM_SCACHESYNC_ASYNCSTORE)
+        return 1;
+
+    osi_Log0(afsd_logp, "cm_SyncOpCheckContinue checking for continuation");
+
+    lock_ObtainRead(&cm_scacheLock);
+    for (w = (cm_scache_waiter_t *)scp->waitQueueH;
+         w;
+         w = (cm_scache_waiter_t *)osi_QNext((osi_queue_t *) w)) {
+        if (w->flags == flags && w->bufp == bufp) {
+            break;
+        }
+    }
+
+    osi_assertx(w != NULL, "null cm_scache_waiter_t");
+    this_is_me = (w->threadId == thrd_Current());
+    lock_ReleaseRead(&cm_scacheLock);
+
+    if (!this_is_me) {
+        osi_Log1(afsd_logp, "cm_SyncOpCheckContinue MISS: Waiter 0x%p", w);
+        return 0;
+    }
+
+    osi_Log1(afsd_logp, "cm_SyncOpCheckContinue HIT: Waiter 0x%p", w);
+
+    lock_ObtainWrite(&cm_scacheLock);
+    osi_QRemoveHT(&scp->waitQueueH, &scp->waitQueueT, (osi_queue_t *) w);
+    cm_ReleaseSCacheNoLock(scp);
+    memset(w, 0, sizeof(*w));
+    osi_QAdd(&cm_allFreeWaiters, (osi_queue_t *) w);
+    lock_ReleaseWrite(&cm_scacheLock);
+
+    return 1;
+}
+
+
 /* synchronize a fetch, store, read, write, fetch status or store status.
  * Called with scache mutex held, and returns with it held, but temporarily
  * drops it during the fetch.
@@ -446,344 +1043,446 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
  * is to serialize all StoreData RPC's.  This is the reason we defined
  * CM_SCACHESYNC_STOREDATA_EXCL and CM_SCACHEFLAG_DATASTORING.
  */
-long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *up, cm_req_t *reqp,
-       long rights, long flags)
+long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *reqp,
+               afs_uint32 rights, afs_uint32 flags)
 {
-       osi_queueData_t *qdp;
-        long code;
-        cm_buf_t *tbufp;
-        long outRights;
-        int bufLocked;
-
-       /* lookup this first */
-       bufLocked = flags & CM_SCACHESYNC_BUFLOCKED;
-
-       /* some minor assertions */
-       if (flags & (CM_SCACHESYNC_STOREDATA | CM_SCACHESYNC_FETCHDATA
-                       | CM_SCACHESYNC_READ | CM_SCACHESYNC_WRITE
-                       | CM_SCACHESYNC_SETSIZE)) {
-               if (bufp) {
-                       osi_assert(bufp->refCount > 0);
-                       /*
-                       osi_assert(cm_FidCmp(&bufp->fid, &scp->fid) == 0);
-                        */
-               }
-       }
-       else osi_assert(bufp == NULL);
-
-       /* Do the access check.  Now we don't really do the access check
-        * atomically, since the caller doesn't expect the parent dir to be
-        * returned locked, and that is what we'd have to do to prevent a
-        * callback breaking message on the parent due to a setacl call from
-        * being processed while we're running.  So, instead, we check things
-         * here, and if things look fine with the access, we proceed to finish
-        * the rest of this check.  Sort of a hack, but probably good enough.
-         */
-
-       while (1) {
-               if (flags & CM_SCACHESYNC_FETCHSTATUS) {
-                       /* if we're bringing in a new status block, ensure that
-                        * we aren't already doing so, and that no one is
-                        * changing the status concurrently, either.  We need
-                        * to do this, even if the status is of a different
-                         * type, since we don't have the ability to figure out,
-                        * in the AFS 3 protocols, which status-changing
-                        * operation ran first, or even which order a read and
-                        * a write occurred in.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK))
-                               goto sleep;
-                }
-                if (flags & (CM_SCACHESYNC_STORESIZE | CM_SCACHESYNC_STORESTATUS
-                               | CM_SCACHESYNC_SETSIZE | CM_SCACHESYNC_GETCALLBACK)) {
-                       /* if we're going to make an RPC to change the status, make sure
-                         * that no one is bringing in or sending out the status.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK))
-                               goto sleep;
-                       if (scp->bufReadsp || scp->bufWritesp) goto sleep;
-                }
-                if (flags & CM_SCACHESYNC_FETCHDATA) {
-                       /* if we're bringing in a new chunk of data, make sure that
-                         * nothing is happening to that chunk, and that we aren't
-                         * changing the basic file status info, either.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK))
-                               goto sleep;
-                        if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING)))
-                               goto sleep;
-                }
-                if (flags & CM_SCACHESYNC_STOREDATA) {
-                       /* same as fetch data */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK))
-                               goto sleep;
-                        if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING)))
-                               goto sleep;
-                }
+    osi_queueData_t *qdp;
+    long code;
+    cm_buf_t *tbufp;
+    afs_uint32 outRights;
+    int bufLocked;
+    afs_uint32 sleep_scp_flags = 0;
+    afs_uint32 sleep_buf_cmflags = 0;
+    afs_uint32 sleep_scp_bufs = 0;
+    int wakeupCycle;
+
+    /* lookup this first */
+    bufLocked = flags & CM_SCACHESYNC_BUFLOCKED;
+
+    if (bufp)
+        osi_assertx(bufp->refCount > 0, "cm_buf_t refCount 0");
+
+
+    /* Do the access check.  Now we don't really do the access check
+     * atomically, since the caller doesn't expect the parent dir to be
+     * returned locked, and that is what we'd have to do to prevent a
+     * callback breaking message on the parent due to a setacl call from
+     * being processed while we're running.  So, instead, we check things
+     * here, and if things look fine with the access, we proceed to finish
+     * the rest of this check.  Sort of a hack, but probably good enough.
+     */
+
+    while (1) {
+        if (flags & CM_SCACHESYNC_FETCHSTATUS) {
+            /* if we're bringing in a new status block, ensure that
+             * we aren't already doing so, and that no one is
+             * changing the status concurrently, either.  We need
+             * to do this, even if the status is of a different
+             * type, since we don't have the ability to figure out,
+             * in the AFS 3 protocols, which status-changing
+             * operation ran first, or even which order a read and
+             * a write occurred in.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
+                               | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want FETCHSTATUS", scp);
+                goto sleep;
+            }
+        }
+        if (flags & (CM_SCACHESYNC_STORESIZE | CM_SCACHESYNC_STORESTATUS
+                      | CM_SCACHESYNC_SETSIZE | CM_SCACHESYNC_GETCALLBACK)) {
+            /* if we're going to make an RPC to change the status, make sure
+             * that no one is bringing in or sending out the status.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING |
+                              CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want STORESIZE|STORESTATUS|SETSIZE|GETCALLBACK", scp);
+                goto sleep;
+            }
+            if (scp->bufReadsp || scp->bufWritesp) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is bufRead|bufWrite want STORESIZE|STORESTATUS|SETSIZE|GETCALLBACK", scp);
+                goto sleep;
+            }
+        }
+        if (flags & CM_SCACHESYNC_FETCHDATA) {
+            /* if we're bringing in a new chunk of data, make sure that
+             * nothing is happening to that chunk, and that we aren't
+             * changing the basic file status info, either.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
+                               | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want FETCHDATA", scp);
+                goto sleep;
+            }
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING | CM_BUF_CMWRITING))) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING|BUF_CMWRITING want FETCHDATA", scp, bufp);
+                goto sleep;
+            }
+        }
+        if (flags & CM_SCACHESYNC_STOREDATA) {
+            /* same as fetch data */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
+                               | CM_SCACHEFLAG_SIZESTORING | CM_SCACHEFLAG_GETCALLBACK)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING|GETCALLBACK want STOREDATA", scp);
+                goto sleep;
+            }
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING | CM_BUF_CMWRITING))) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING|BUF_CMSTORING|BUF_CMWRITING want STOREDATA", scp, bufp);
+                goto sleep;
+            }
+        }
 
-               if (flags & CM_SCACHESYNC_STOREDATA_EXCL) {
-                       /* Don't allow concurrent StoreData RPC's */
-                       if (scp->flags & CM_SCACHEFLAG_DATASTORING)
-                               goto sleep;
-               }
+        if (flags & CM_SCACHESYNC_STOREDATA_EXCL) {
+            /* Don't allow concurrent StoreData RPC's */
+            if (scp->flags & CM_SCACHEFLAG_DATASTORING) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is DATASTORING want STOREDATA_EXCL", scp);
+                goto sleep;
+            }
+        }
 
-               if (flags & CM_SCACHESYNC_ASYNCSTORE) {
-                       /* Don't allow more than one BKG store request */
-                       if (scp->flags & CM_SCACHEFLAG_ASYNCSTORING)
-                               goto sleep;
-               }
+        if (flags & CM_SCACHESYNC_ASYNCSTORE) {
+            /* Don't allow more than one BKG store request */
+            if (scp->flags & CM_SCACHEFLAG_ASYNCSTORING) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is ASYNCSTORING want ASYNCSTORE", scp);
+                goto sleep;
+            }
+        }
 
-               if (flags & CM_SCACHESYNC_LOCK) {
-                       /* Don't allow concurrent fiddling with lock lists */
-                       if (scp->flags & CM_SCACHEFLAG_LOCKING)
-                               goto sleep;
-               }
+        if (flags & CM_SCACHESYNC_LOCK) {
+            /* Don't allow concurrent fiddling with lock lists */
+            if (scp->flags & CM_SCACHEFLAG_LOCKING) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is LOCKING want LOCK", scp);
+                goto sleep;
+            }
+        }
 
-                /* now the operations that don't correspond to making RPCs */
-                if (flags & CM_SCACHESYNC_GETSTATUS) {
-                       /* we can use the status that's here, if we're not
-                        * bringing in new status.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING))
-                               goto sleep;
-                }
-               if (flags & CM_SCACHESYNC_SETSTATUS) {
-                       /* we can make a change to the local status, as long as
-                        * the status isn't changing now.
-                        *
-                         * If we're fetching or storing a chunk of data, we can
-                        * change the status locally, since the fetch/store
-                        * operations don't change any of the data that we're
-                        * changing here.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING))
-                               goto sleep;
-                }
-                if (flags & CM_SCACHESYNC_READ) {
-                       /* we're going to read the data, make sure that the
-                        * status is available, and that the data is here.  It
-                        * is OK to read while storing the data back.
-                         */
-                       if (scp->flags & CM_SCACHEFLAG_FETCHING)
-                               goto sleep;
-                        if (bufp && ((bufp->cmFlags
-                                        & (CM_BUF_CMFETCHING
-                                            | CM_BUF_CMFULLYFETCHED))
-                                       == CM_BUF_CMFETCHING))
-                               goto sleep;
-                }
-               if (flags & CM_SCACHESYNC_WRITE) {
-                       /* don't write unless the status is stable and the chunk
-                         * is stable.
-                         */
-                       if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
-                                         | CM_SCACHEFLAG_SIZESTORING))
-                               goto sleep;
-                        if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMSTORING)))
-                               goto sleep;
-                }
+        /* now the operations that don't correspond to making RPCs */
+        if (flags & CM_SCACHESYNC_GETSTATUS) {
+            /* we can use the status that's here, if we're not
+             * bringing in new status.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING want GETSTATUS", scp);
+                goto sleep;
+            }
+        }
+        if (flags & CM_SCACHESYNC_SETSTATUS) {
+            /* we can make a change to the local status, as long as
+             * the status isn't changing now.
+             *
+             * If we're fetching or storing a chunk of data, we can
+             * change the status locally, since the fetch/store
+             * operations don't change any of the data that we're
+             * changing here.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING | CM_SCACHEFLAG_SIZESTORING)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING want SETSTATUS", scp);
+                goto sleep;
+            }
+        }
+        if (flags & CM_SCACHESYNC_READ) {
+            /* we're going to read the data, make sure that the
+             * status is available, and that the data is here.  It
+             * is OK to read while storing the data back.
+             */
+            if (scp->flags & CM_SCACHEFLAG_FETCHING) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING want READ", scp);
+                goto sleep;
+            }
+            if (bufp && ((bufp->cmFlags & (CM_BUF_CMFETCHING | CM_BUF_CMFULLYFETCHED)) == CM_BUF_CMFETCHING)) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMFETCHING want READ", scp, bufp);
+                goto sleep;
+            }
+            if (bufp && (bufp->cmFlags & CM_BUF_CMWRITING)) {
+                osi_Log2(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is BUF_CMWRITING want READ", scp, bufp);
+                goto sleep;
+            }
+        }
+        if (flags & CM_SCACHESYNC_WRITE) {
+            /* don't write unless the status is stable and the chunk
+             * is stable.
+             */
+            if (scp->flags & (CM_SCACHEFLAG_FETCHING | CM_SCACHEFLAG_STORING
+                               | CM_SCACHEFLAG_SIZESTORING)) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is FETCHING|STORING|SIZESTORING want WRITE", scp);
+                goto sleep;
+            }
+            if (bufp && (bufp->cmFlags & (CM_BUF_CMFETCHING |
+                                          CM_BUF_CMSTORING |
+                                          CM_BUF_CMWRITING))) {
+                osi_Log3(afsd_logp, "CM SyncOp scp 0x%p bufp 0x%p is %s want WRITE",
+                         scp, bufp,
+                         ((bufp->cmFlags & CM_BUF_CMFETCHING) ? "CM_BUF_CMFETCHING":
+                          ((bufp->cmFlags & CM_BUF_CMSTORING) ? "CM_BUF_CMSTORING" :
+                           ((bufp->cmFlags & CM_BUF_CMWRITING) ? "CM_BUF_CMWRITING" :
+                            "UNKNOWN!!!"))));
+                goto sleep;
+            }
+        }
 
-               // yj: modified this so that callback only checked if we're
-               // not checking something on /afs
-               if (  (flags & CM_SCACHESYNC_NEEDCALLBACK)
+        // yj: modified this so that callback only checked if we're
+        // not checking something on /afs
+        /* fix the conditional to match the one in cm_HaveCallback */
+        if ((flags & CM_SCACHESYNC_NEEDCALLBACK)
 #ifdef AFS_FREELANCE_CLIENT
-                       && (!cm_freelanceEnabled || !(!(scp->fid.vnode==0x1 &&
-                                                        scp->fid.unique==0x1) &&
-                                                        scp->fid.cell==AFS_FAKE_ROOT_CELL_ID &&
-                                                        scp->fid.volume==AFS_FAKE_ROOT_VOL_ID))
+             && (!cm_freelanceEnabled || 
+                  !(scp->fid.vnode==0x1 && scp->fid.unique==0x1) ||
+                  scp->fid.cell!=AFS_FAKE_ROOT_CELL_ID ||
+                  scp->fid.volume!=AFS_FAKE_ROOT_VOL_ID ||
+                  cm_fakeDirCallback < 2)
 #endif /* AFS_FREELANCE_CLIENT */
-                   ) {
-                       if (!cm_HaveCallback(scp)) {
-                               osi_Log1(afsd_logp, "CM SyncOp getting callback on scp %x",
-                                       (long) scp);
-                               if (bufLocked) lock_ReleaseMutex(&bufp->mx);
-                               code = cm_GetCallback(scp, up, reqp, 0);
-                                if (bufLocked) {
-                                       lock_ReleaseMutex(&scp->mx);
-                                        lock_ObtainMutex(&bufp->mx);
-                                        lock_ObtainMutex(&scp->mx);
-                                }
-                                if (code) return code;
-                               continue;
-                        }
+             ) {
+            if ((flags & CM_SCACHESYNC_FORCECB) || !cm_HaveCallback(scp)) {
+                osi_Log1(afsd_logp, "CM SyncOp getting callback on scp 0x%p",
+                          scp);
+                if (bufLocked) 
+                   lock_ReleaseMutex(&bufp->mx);
+                code = cm_GetCallback(scp, userp, reqp, (flags & CM_SCACHESYNC_FORCECB)?1:0);
+                if (bufLocked) {
+                    lock_ReleaseWrite(&scp->rw);
+                    lock_ObtainMutex(&bufp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
-                
-               if (rights) {
-                       /* can't check access rights without a callback */
-                       osi_assert(flags & CM_SCACHESYNC_NEEDCALLBACK);
-
-                       if ((rights & PRSFS_WRITE) && (scp->flags & CM_SCACHEFLAG_RO))
-                               return CM_ERROR_READONLY;
-
-                       if (cm_HaveAccessRights(scp, up, rights, &outRights)) {
-                               if (~outRights & rights) return CM_ERROR_NOACCESS;
-                        }
-                        else {
-                               /* we don't know the required access rights */
-                               if (bufLocked) lock_ReleaseMutex(&bufp->mx);
-                                code = cm_GetAccessRights(scp, up, reqp);
-                                if (code) return code;
-                                if (bufLocked) {
-                                       lock_ReleaseMutex(&scp->mx);
-                                        lock_ObtainMutex(&bufp->mx);
-                                        lock_ObtainMutex(&scp->mx);
-                                }
-                                continue;
-                        }
+                if (code) 
+                    return code;
+               flags &= ~CM_SCACHESYNC_FORCECB;        /* only force once */
+                continue;
+            }
+        }
+
+        if (rights) {
+            /* can't check access rights without a callback */
+            osi_assertx(flags & CM_SCACHESYNC_NEEDCALLBACK, "!CM_SCACHESYNC_NEEDCALLBACK");
+
+            if ((rights & PRSFS_WRITE) && (scp->flags & CM_SCACHEFLAG_RO))
+                return CM_ERROR_READONLY;
+
+            if (cm_HaveAccessRights(scp, userp, rights, &outRights)) {
+                if (~outRights & rights) 
+                   return CM_ERROR_NOACCESS;
+            }
+            else {
+                /* we don't know the required access rights */
+                if (bufLocked) lock_ReleaseMutex(&bufp->mx);
+                code = cm_GetAccessRights(scp, userp, reqp);
+                if (bufLocked) {
+                    lock_ReleaseWrite(&scp->rw);
+                    lock_ObtainMutex(&bufp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
+                if (code) 
+                    return code;
+                continue;
+            }
+        }
 
-                /* if we get here, we're happy */
-                break;
-
-sleep:
-               /* first check if we're not supposed to wait: fail 
-                 * in this case, returning with everything still locked.
-                 */
-               if (flags & CM_SCACHESYNC_NOWAIT) return CM_ERROR_WOULDBLOCK;
-
-               /* wait here, then try again */
-               osi_Log1(afsd_logp, "CM SyncOp sleeping scp %x", (long) scp);
-        if ( scp->flags & CM_SCACHEFLAG_WAITING ) 
-            osi_Log1(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING already set for 0x%x", scp);
-        else 
-            osi_Log1(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING set for 0x%x", scp);
-               scp->flags |= CM_SCACHEFLAG_WAITING;
-               if (bufLocked) lock_ReleaseMutex(&bufp->mx);
-        osi_SleepM((long) &scp->flags, &scp->mx);
-        osi_Log0(afsd_logp, "CM SyncOp woke!");
-        if (bufLocked) lock_ObtainMutex(&bufp->mx);
-        lock_ObtainMutex(&scp->mx);
-        } /* big while loop */
-        
-        /* now, update the recorded state for RPC-type calls */
-        if (flags & CM_SCACHESYNC_FETCHSTATUS)
-               scp->flags |= CM_SCACHEFLAG_FETCHING;
-       if (flags & CM_SCACHESYNC_STORESTATUS)
-               scp->flags |= CM_SCACHEFLAG_STORING;
-       if (flags & CM_SCACHESYNC_STORESIZE)
-               scp->flags |= CM_SCACHEFLAG_SIZESTORING;
-       if (flags & CM_SCACHESYNC_GETCALLBACK)
-               scp->flags |= CM_SCACHEFLAG_GETCALLBACK;
-       if (flags & CM_SCACHESYNC_STOREDATA_EXCL)
-               scp->flags |= CM_SCACHEFLAG_DATASTORING;
-       if (flags & CM_SCACHESYNC_ASYNCSTORE)
-               scp->flags |= CM_SCACHEFLAG_ASYNCSTORING;
-       if (flags & CM_SCACHESYNC_LOCK)
-               scp->flags |= CM_SCACHEFLAG_LOCKING;
-
-       /* now update the buffer pointer */
-        if (flags & CM_SCACHESYNC_FETCHDATA) {
-               /* ensure that the buffer isn't already in the I/O list */
-               if (bufp) {
-                       for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
-                               tbufp = osi_GetQData(qdp);
-                               osi_assert(tbufp != bufp);
-                       }
-               }
-                
-               /* queue a held reference to the buffer in the "reading" I/O list */
-                qdp = osi_QDAlloc();
-                osi_SetQData(qdp, bufp);
-               if (bufp) {
-                       buf_Hold(bufp);
-                       bufp->cmFlags |= CM_BUF_CMFETCHING;
-               }
-                osi_QAdd((osi_queue_t **) &scp->bufReadsp, &qdp->q);
+        /* if we get here, we're happy */
+        break;
+
+      sleep:
+        /* first check if we're not supposed to wait: fail 
+         * in this case, returning with everything still locked.
+         */
+        if (flags & CM_SCACHESYNC_NOWAIT) 
+            return CM_ERROR_WOULDBLOCK;
+
+        /* These are used for minidump debugging */
+       sleep_scp_flags = scp->flags;           /* so we know why we slept */
+       sleep_buf_cmflags = bufp ? bufp->cmFlags : 0;
+       sleep_scp_bufs = (scp->bufReadsp ? 1 : 0) | (scp->bufWritesp ? 2 : 0);
+
+        /* wait here, then try again */
+        osi_Log1(afsd_logp, "CM SyncOp sleeping scp 0x%p", scp);
+        if ( scp->flags & CM_SCACHEFLAG_WAITING ) {
+            scp->waitCount++;
+            scp->waitRequests++;
+            osi_Log3(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING already set for 0x%p; %d threads; %d requests", 
+                     scp, scp->waitCount, scp->waitRequests);
+        } else {
+            osi_Log1(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING set for 0x%p", scp);
+            scp->flags |= CM_SCACHEFLAG_WAITING;
+            scp->waitCount = scp->waitRequests = 1;
         }
 
-        if (flags & CM_SCACHESYNC_STOREDATA) {
-               /* ensure that the buffer isn't already in the I/O list */
-               if (bufp) {
-                       for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
-                               tbufp = osi_GetQData(qdp);
-                               osi_assert(tbufp != bufp);
-                       }
-               }
-                
-               /* queue a held reference to the buffer in the "writing" I/O list */
-                qdp = osi_QDAlloc();
-                osi_SetQData(qdp, bufp);
-               if (bufp) {
-                       buf_Hold(bufp);
-                       bufp->cmFlags |= CM_BUF_CMSTORING;
-               }
-                osi_QAdd((osi_queue_t **) &scp->bufWritesp, &qdp->q);
+        cm_SyncOpAddToWaitQueue(scp, flags, bufp);
+        wakeupCycle = 0;
+        do {
+            if (bufLocked) 
+                lock_ReleaseMutex(&bufp->mx);
+            osi_SleepW((LONG_PTR) &scp->flags, &scp->rw);
+            if (bufLocked) 
+                lock_ObtainMutex(&bufp->mx);
+            lock_ObtainWrite(&scp->rw);
+        } while (!cm_SyncOpCheckContinue(scp, flags, bufp));
+
+       smb_UpdateServerPriority();
+
+        scp->waitCount--;
+        osi_Log3(afsd_logp, "CM SyncOp woke! scp 0x%p; still waiting %d threads of %d requests", 
+                 scp, scp->waitCount, scp->waitRequests);
+        if (scp->waitCount == 0) {
+            osi_Log1(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING reset for 0x%p", scp);
+            scp->flags &= ~CM_SCACHEFLAG_WAITING;
+            scp->waitRequests = 0;
         }
+    } /* big while loop */
         
-        return 0;
+    /* now, update the recorded state for RPC-type calls */
+    if (flags & CM_SCACHESYNC_FETCHSTATUS)
+        scp->flags |= CM_SCACHEFLAG_FETCHING;
+    if (flags & CM_SCACHESYNC_STORESTATUS)
+        scp->flags |= CM_SCACHEFLAG_STORING;
+    if (flags & CM_SCACHESYNC_STORESIZE)
+        scp->flags |= CM_SCACHEFLAG_SIZESTORING;
+    if (flags & CM_SCACHESYNC_GETCALLBACK)
+        scp->flags |= CM_SCACHEFLAG_GETCALLBACK;
+    if (flags & CM_SCACHESYNC_STOREDATA_EXCL)
+        scp->flags |= CM_SCACHEFLAG_DATASTORING;
+    if (flags & CM_SCACHESYNC_ASYNCSTORE)
+        scp->flags |= CM_SCACHEFLAG_ASYNCSTORING;
+    if (flags & CM_SCACHESYNC_LOCK)
+        scp->flags |= CM_SCACHEFLAG_LOCKING;
+
+    /* now update the buffer pointer */
+    if (flags & CM_SCACHESYNC_FETCHDATA) {
+        /* ensure that the buffer isn't already in the I/O list */
+        if (bufp) {
+            for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
+                tbufp = osi_GetQData(qdp);
+                osi_assertx(tbufp != bufp, "unexpected cm_buf_t value");
+            }
+        }
+
+        /* queue a held reference to the buffer in the "reading" I/O list */
+        qdp = osi_QDAlloc();
+        osi_SetQData(qdp, bufp);
+        if (bufp) {
+            buf_Hold(bufp);
+            bufp->cmFlags |= CM_BUF_CMFETCHING;
+        }
+        osi_QAdd((osi_queue_t **) &scp->bufReadsp, &qdp->q);
+    }
+
+    if (flags & CM_SCACHESYNC_STOREDATA) {
+        /* ensure that the buffer isn't already in the I/O list */
+        if (bufp) {
+            for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
+                tbufp = osi_GetQData(qdp);
+                osi_assertx(tbufp != bufp, "unexpected cm_buf_t value");
+            }
+        }
+
+        /* queue a held reference to the buffer in the "writing" I/O list */
+        qdp = osi_QDAlloc();
+        osi_SetQData(qdp, bufp);
+        if (bufp) {
+            buf_Hold(bufp);
+            bufp->cmFlags |= CM_BUF_CMSTORING;
+        }
+        osi_QAdd((osi_queue_t **) &scp->bufWritesp, &qdp->q);
+    }
+
+    if (flags & CM_SCACHESYNC_WRITE) {
+        /* mark the buffer as being written to. */
+        if (bufp) {
+            bufp->cmFlags |= CM_BUF_CMWRITING;
+        }
+    }
+
+    return 0;
 }
 
 /* for those syncops that setup for RPCs.
  * Called with scache locked.
  */
-void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, long flags)
+void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
 {
-       osi_queueData_t *qdp;
-       cm_buf_t *tbufp;
-
-        /* now, update the recorded state for RPC-type calls */
-        if (flags & CM_SCACHESYNC_FETCHSTATUS)
-               scp->flags &= ~CM_SCACHEFLAG_FETCHING;
-       if (flags & CM_SCACHESYNC_STORESTATUS)
-               scp->flags &= ~CM_SCACHEFLAG_STORING;
-       if (flags & CM_SCACHESYNC_STORESIZE)
-               scp->flags &= ~CM_SCACHEFLAG_SIZESTORING;
-       if (flags & CM_SCACHESYNC_GETCALLBACK)
-               scp->flags &= ~CM_SCACHEFLAG_GETCALLBACK;
-       if (flags & CM_SCACHESYNC_STOREDATA_EXCL)
-               scp->flags &= ~CM_SCACHEFLAG_DATASTORING;
-       if (flags & CM_SCACHESYNC_ASYNCSTORE)
-               scp->flags &= ~CM_SCACHEFLAG_ASYNCSTORING;
-       if (flags & CM_SCACHESYNC_LOCK)
-               scp->flags &= ~CM_SCACHEFLAG_LOCKING;
-
-       /* now update the buffer pointer */
-        if (flags & CM_SCACHESYNC_FETCHDATA) {
-               /* ensure that the buffer isn't already in the I/O list */
-               for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
-                       tbufp = osi_GetQData(qdp);
-                       if (tbufp == bufp) break;
-                }
-                osi_assert(qdp != NULL);
-               osi_assert(osi_GetQData(qdp) == bufp);
-               osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q);
-                osi_QDFree(qdp);
-               if (bufp) {
-                       bufp->cmFlags &=
-                         ~(CM_BUF_CMFETCHING | CM_BUF_CMFULLYFETCHED);
-                       buf_Release(bufp);
-               }
+    osi_queueData_t *qdp;
+    cm_buf_t *tbufp;
+
+    lock_AssertWrite(&scp->rw);
+
+    /* now, update the recorded state for RPC-type calls */
+    if (flags & CM_SCACHESYNC_FETCHSTATUS)
+        scp->flags &= ~CM_SCACHEFLAG_FETCHING;
+    if (flags & CM_SCACHESYNC_STORESTATUS)
+        scp->flags &= ~CM_SCACHEFLAG_STORING;
+    if (flags & CM_SCACHESYNC_STORESIZE)
+        scp->flags &= ~CM_SCACHEFLAG_SIZESTORING;
+    if (flags & CM_SCACHESYNC_GETCALLBACK)
+        scp->flags &= ~CM_SCACHEFLAG_GETCALLBACK;
+    if (flags & CM_SCACHESYNC_STOREDATA_EXCL)
+        scp->flags &= ~CM_SCACHEFLAG_DATASTORING;
+    if (flags & CM_SCACHESYNC_ASYNCSTORE)
+        scp->flags &= ~CM_SCACHEFLAG_ASYNCSTORING;
+    if (flags & CM_SCACHESYNC_LOCK)
+        scp->flags &= ~CM_SCACHEFLAG_LOCKING;
+
+    /* now update the buffer pointer */
+    if (flags & CM_SCACHESYNC_FETCHDATA) {
+       int release = 0;
+
+       /* ensure that the buffer isn't already in the I/O list */
+        for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
+            tbufp = osi_GetQData(qdp);
+            if (tbufp == bufp) 
+               break;
+        }
+       if (qdp) {
+           osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q);
+           osi_QDFree(qdp);
+           release = 1;
+       }
+        if (bufp) {
+            bufp->cmFlags &= ~(CM_BUF_CMFETCHING | CM_BUF_CMFULLYFETCHED);
+            if (bufp->flags & CM_BUF_WAITING) {
+                osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp);
+                osi_Wakeup((LONG_PTR) &bufp);
+            }
+           if (release)
+               buf_Release(bufp);
         }
+    }
 
-       /* now update the buffer pointer */
-        if (flags & CM_SCACHESYNC_STOREDATA) {
-               /* ensure that the buffer isn't already in the I/O list */
-               for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
-                       tbufp = osi_GetQData(qdp);
-                       if (tbufp == bufp) break;
-                }
-                osi_assert(qdp != NULL);
-               osi_assert(osi_GetQData(qdp) == bufp);
-               osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q);
-                osi_QDFree(qdp);
-               if (bufp) {
-                       bufp->cmFlags &= ~CM_BUF_CMSTORING;
-                       buf_Release(bufp);
-               }
+    /* now update the buffer pointer */
+    if (flags & CM_SCACHESYNC_STOREDATA) {
+       int release = 0;
+        /* ensure that the buffer isn't already in the I/O list */
+        for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
+            tbufp = osi_GetQData(qdp);
+            if (tbufp == bufp) 
+               break;
         }
-        
-        /* and wakeup anyone who is waiting */
-        if (scp->flags & CM_SCACHEFLAG_WAITING) {
-            osi_Log1(afsd_logp, "CM SyncOp CM_SCACHEFLAG_WAITING reset for 0x%x", scp);
-            scp->flags &= ~CM_SCACHEFLAG_WAITING;
-            osi_Wakeup((long) &scp->flags);
+       if (qdp) {
+           osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q);
+           osi_QDFree(qdp);
+           release = 1;
+       }
+        if (bufp) {
+            bufp->cmFlags &= ~CM_BUF_CMSTORING;
+            if (bufp->flags & CM_BUF_WAITING) {
+                osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp);
+                osi_Wakeup((LONG_PTR) &bufp);
+            }
+           if (release)
+               buf_Release(bufp);
         }
-}
+    }
+
+    if (flags & CM_SCACHESYNC_WRITE) {
+        if (bufp) {
+            osi_assertx(bufp->cmFlags & CM_BUF_CMWRITING, "!CM_BUF_CMWRITING");
+
+            bufp->cmFlags &= ~CM_BUF_CMWRITING;
+        }
+    }
+
+    /* and wakeup anyone who is waiting */
+    if (scp->flags & CM_SCACHEFLAG_WAITING) {
+        osi_Log1(afsd_logp, "CM SyncOpDone Waking scp 0x%p", scp);
+        osi_Wakeup((LONG_PTR) &scp->flags);
+    }
+}       
 
 /* merge in a response from an RPC.  The scp must be locked, and the callback
  * is optional.
@@ -798,122 +1497,242 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, long flags)
  * handled after the callback breaking is done, but only one of whose calls
  * started before that, can cause old info to be merged from the first call.
  */
-void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
-       cm_user_t *userp, int flags)
+void cm_MergeStatus(cm_scache_t *dscp, 
+                   cm_scache_t *scp, AFSFetchStatus *statusp, 
+                   AFSVolSync *volsyncp,
+                    cm_user_t *userp, afs_uint32 flags)
 {
-       // yj: i want to create some fake status for the /afs directory and the
-       // entries under that directory
+    afs_uint64 dataVersion;
+
+    // yj: i want to create some fake status for the /afs directory and the
+    // entries under that directory
 #ifdef AFS_FREELANCE_CLIENT
-       if (cm_freelanceEnabled && scp == cm_rootSCachep) {
-               osi_Log0(afsd_logp,"cm_MergeStatus Freelance cm_rootSCachep");
-               statusp->InterfaceVersion = 0x1;
-               statusp->FileType = 0x2;
-               statusp->LinkCount = scp->linkCount;
-               statusp->Length = cm_fakeDirSize;
-               statusp->DataVersion = cm_fakeDirVersion;
-               statusp->Author = 0x1;
-               statusp->Owner = 0x0;
-               statusp->CallerAccess = 0x9;
-               statusp->AnonymousAccess = 0x9;
-               statusp->UnixModeBits = 0x1ff;
-               statusp->ParentVnode = 0x1;
-               statusp->ParentUnique = 0x1;
-               statusp->ResidencyMask = 0;
-               statusp->ClientModTime = FakeFreelanceModTime;
-               statusp->ServerModTime = FakeFreelanceModTime;
-               statusp->Group = 0;
-               statusp->SyncCounter = 0;
-               statusp->dataVersionHigh = 0;
-       }
+    if (cm_freelanceEnabled && scp == cm_data.rootSCachep) {
+        osi_Log0(afsd_logp,"cm_MergeStatus Freelance cm_data.rootSCachep");
+        statusp->InterfaceVersion = 0x1;
+        statusp->FileType = CM_SCACHETYPE_DIRECTORY;
+        statusp->LinkCount = scp->linkCount;
+        statusp->Length = cm_fakeDirSize;
+        statusp->Length_hi = 0;
+        statusp->DataVersion = (afs_uint32)(cm_data.fakeDirVersion & 0xFFFFFFFF);
+        statusp->Author = 0x1;
+        statusp->Owner = 0x0;
+        statusp->CallerAccess = 0x9;
+        statusp->AnonymousAccess = 0x9;
+        statusp->UnixModeBits = 0777;
+        statusp->ParentVnode = 0x1;
+        statusp->ParentUnique = 0x1;
+        statusp->ResidencyMask = 0;
+        statusp->ClientModTime = FakeFreelanceModTime;
+        statusp->ServerModTime = FakeFreelanceModTime;
+        statusp->Group = 0;
+        statusp->SyncCounter = 0;
+        statusp->dataVersionHigh = (afs_uint32)(cm_data.fakeDirVersion >> 32);
+        statusp->errorCode = 0;
+    }
 #endif /* AFS_FREELANCE_CLIENT */
 
-       if (!(flags & CM_MERGEFLAG_FORCE)
-                       && statusp->DataVersion < (unsigned long) scp->dataVersion) {
-               struct cm_cell *cellp;
-               struct cm_volume *volp;
-
-               cellp = cm_FindCellByID(scp->fid.cell);
-               cm_GetVolumeByID(cellp, scp->fid.volume, userp,
-                                (cm_req_t *) NULL, &volp);
-               if (scp->cbServerp)
-                       osi_Log2(afsd_logp, "old data from server %x volume %s",
-                                scp->cbServerp->addr.sin_addr.s_addr,
-                                volp->namep);
-               osi_Log3(afsd_logp, "Bad merge, scp %x, scp dv %d, RPC dv %d",
-                        scp, scp->dataVersion, statusp->DataVersion);
-               /* we have a number of data fetch/store operations running
-                * concurrently, and we can tell which one executed last at the
-                * server by its mtime.
-                * Choose the one with the largest mtime, and ignore the rest.
-                *
-                * These concurrent calls are incompatible with setting the
-                * mtime, so we won't have a locally changed mtime here.
-                 *
-                 * We could also have ACL info for a different user than usual,
-                * in which case we have to do that part of the merge, anyway.
-                * We won't have to worry about the info being old, since we
-                * won't have concurrent calls
-                 * that change file status running from this machine.
-                *
-                * Added 3/17/98:  if we see data version regression on an RO
-                * file, it's probably due to a server holding an out-of-date
-                * replica, rather than to concurrent RPC's.  Failures to
-                * release replicas are now flagged by the volserver, but only
-                * since AFS 3.4 5.22, so there are plenty of clients getting
-                * out-of-date replicas out there.
-                *
-                * If we discover an out-of-date replica, by this time it's too
-                * late to go to another server and retry.  Also, we can't
-                * reject the merge, because then there is no way for
-                * GetAccess to do its work, and the caller gets into an
-                * infinite loop.  So we just grin and bear it.
-                */
-               if (!(scp->flags & CM_SCACHEFLAG_RO))
-                       return;
-       }
-        scp->serverModTime = statusp->ServerModTime;
+    if (statusp->errorCode != 0) {     
+       scp->flags |= CM_SCACHEFLAG_EACCESS;
+       osi_Log2(afsd_logp, "Merge, Failure scp %x code 0x%x", scp, statusp->errorCode);
 
-       if (!(scp->mask & CM_SCACHEMASK_CLIENTMODTIME)) {
-               scp->clientModTime = statusp->ClientModTime;
-       }
-        if (!(scp->mask & CM_SCACHEMASK_LENGTH)) {
-               scp->length.LowPart = statusp->Length;
-                scp->length.HighPart = 0;
-       }
+       scp->fileType = 0;      /* unknown */
 
-       scp->serverLength.LowPart = statusp->Length;
+       scp->serverModTime = 0;
+       scp->clientModTime = 0;
+       scp->length.LowPart = 0;
+       scp->length.HighPart = 0;
+       scp->serverLength.LowPart = 0;
        scp->serverLength.HighPart = 0;
+       scp->linkCount = 0;
+       scp->owner = 0;
+       scp->group = 0;
+       scp->unixModeBits = 0;
+       scp->anyAccess = 0;
+       scp->dataVersion = 0;
+        scp->bufDataVersionLow = 0;
+
+       if (dscp) {
+            scp->parentVnode = dscp->fid.vnode;
+            scp->parentUnique = dscp->fid.unique;
+       } else {
+            scp->parentVnode = 0;
+            scp->parentUnique = 0;
+       }
+       return;
+    } else {
+       scp->flags &= ~CM_SCACHEFLAG_EACCESS;
+    }
 
-       scp->linkCount = statusp->LinkCount;
-        scp->dataVersion = statusp->DataVersion;
-        scp->owner = statusp->Owner;
-        scp->group = statusp->Group;
-        scp->unixModeBits = statusp->UnixModeBits & 07777;
-        
-        if (statusp->FileType == File)
-               scp->fileType = CM_SCACHETYPE_FILE;
-       else if (statusp->FileType == Directory)
-               scp->fileType = CM_SCACHETYPE_DIRECTORY;
-       else if (statusp->FileType == SymbolicLink) {
-               if ((scp->unixModeBits & 0111) == 0)
-                       scp->fileType = CM_SCACHETYPE_MOUNTPOINT;
-               else
-                       scp->fileType = CM_SCACHETYPE_SYMLINK;
+    dataVersion = statusp->dataVersionHigh;
+    dataVersion <<= 32;
+    dataVersion |= statusp->DataVersion;
+
+    if (!(flags & CM_MERGEFLAG_FORCE) && dataVersion < scp->dataVersion) {
+        struct cm_cell *cellp;
+
+        cellp = cm_FindCellByID(scp->fid.cell, 0);
+        if (scp->cbServerp) {
+            struct cm_volume *volp = NULL;
+
+            cm_GetVolumeByID(cellp, scp->fid.volume, userp,
+                              (cm_req_t *) NULL, CM_GETVOL_FLAG_CREATE, &volp);
+            osi_Log2(afsd_logp, "old data from server %x volume %s",
+                      scp->cbServerp->addr.sin_addr.s_addr,
+                      volp ? volp->namep : "(unknown)");
+            if (volp)
+                cm_PutVolume(volp);
         }
-        else scp->fileType = 0;        /* invalid */
+        osi_Log3(afsd_logp, "Bad merge, scp %x, scp dv %d, RPC dv %d",
+                  scp, scp->dataVersion, dataVersion);
+        /* we have a number of data fetch/store operations running
+         * concurrently, and we can tell which one executed last at the
+         * server by its mtime.
+         * Choose the one with the largest mtime, and ignore the rest.
+         *
+         * These concurrent calls are incompatible with setting the
+         * mtime, so we won't have a locally changed mtime here.
+         *
+         * We could also have ACL info for a different user than usual,
+         * in which case we have to do that part of the merge, anyway.
+         * We won't have to worry about the info being old, since we
+         * won't have concurrent calls
+         * that change file status running from this machine.
+         *
+         * Added 3/17/98:  if we see data version regression on an RO
+         * file, it's probably due to a server holding an out-of-date
+         * replica, rather than to concurrent RPC's.  Failures to
+         * release replicas are now flagged by the volserver, but only
+         * since AFS 3.4 5.22, so there are plenty of clients getting
+         * out-of-date replicas out there.
+         *
+         * If we discover an out-of-date replica, by this time it's too
+         * late to go to another server and retry.  Also, we can't
+         * reject the merge, because then there is no way for
+         * GetAccess to do its work, and the caller gets into an
+         * infinite loop.  So we just grin and bear it.
+         */
+        if (!(scp->flags & CM_SCACHEFLAG_RO))
+            return;
+    }       
+
+    scp->serverModTime = statusp->ServerModTime;
 
-        /* and other stuff */
-        scp->parentVnode = statusp->ParentVnode;
-        scp->parentUnique = statusp->ParentUnique;
+    if (!(scp->mask & CM_SCACHEMASK_CLIENTMODTIME)) {
+        scp->clientModTime = statusp->ClientModTime;
+    }
+    if (!(scp->mask & CM_SCACHEMASK_LENGTH)) {
+        scp->length.LowPart = statusp->Length;
+        scp->length.HighPart = statusp->Length_hi;
+    }
+
+    scp->serverLength.LowPart = statusp->Length;
+    scp->serverLength.HighPart = statusp->Length_hi;
+
+    scp->linkCount = statusp->LinkCount;
+    scp->owner = statusp->Owner;
+    scp->group = statusp->Group;
+    scp->unixModeBits = statusp->UnixModeBits & 07777;
+
+    if (statusp->FileType == File)
+        scp->fileType = CM_SCACHETYPE_FILE;
+    else if (statusp->FileType == Directory)
+        scp->fileType = CM_SCACHETYPE_DIRECTORY;
+    else if (statusp->FileType == SymbolicLink) {
+        if ((scp->unixModeBits & 0111) == 0)
+            scp->fileType = CM_SCACHETYPE_MOUNTPOINT;
+        else
+            scp->fileType = CM_SCACHETYPE_SYMLINK;
+    }       
+    else {
+        osi_Log2(afsd_logp, "Merge, Invalid File Type (%d), scp %x", statusp->FileType, scp);
+        scp->fileType = CM_SCACHETYPE_INVALID; /* invalid */
+    }
+    /* and other stuff */
+    scp->parentVnode = statusp->ParentVnode;
+    scp->parentUnique = statusp->ParentUnique;
         
-        /* and merge in the private acl cache info, if this is more than the public
-         * info; merge in the public stuff in any case.
+    /* and merge in the private acl cache info, if this is more than the public
+     * info; merge in the public stuff in any case.
+     */
+    scp->anyAccess = statusp->AnonymousAccess;
+
+    if (userp != NULL) {
+        cm_AddACLCache(scp, userp, statusp->CallerAccess);
+    }
+
+    if (scp->dataVersion != 0 &&
+        (!(flags & CM_MERGEFLAG_DIROP) && dataVersion != scp->dataVersion ||
+         (flags & CM_MERGEFLAG_DIROP) && dataVersion - scp->dataVersion > 1)) {
+        /* 
+         * We now know that all of the data buffers that we have associated
+         * with this scp are invalid.  Subsequent operations will go faster
+         * if the buffers are removed from the hash tables.
+         *
+         * We do not remove directory buffers if the dataVersion delta is 1 because
+         * those version numbers will be updated as part of the directory operation.
          */
-       scp->anyAccess = statusp->AnonymousAccess;
+        int i, j;
+        cm_buf_t **lbpp;
+        cm_buf_t *tbp;
+        cm_buf_t *bp, *prevBp, *nextBp;
+
+        lock_ObtainWrite(&buf_globalLock);
+        i = BUF_FILEHASH(&scp->fid);
+               for (bp = cm_data.buf_fileHashTablepp[i]; bp; bp=nextBp)
+       {
+            nextBp = bp->fileHashp;
+            /* 
+             * if the buffer belongs to this stat cache entry
+             * and the buffer mutex can be obtained, check the
+             * reference count and if it is zero, remove the buffer
+             * from the hash tables.  If there are references,
+             * the buffer might be updated to the current version
+             * so leave it in place.
+             */
+            if (cm_FidCmp(&scp->fid, &bp->fid) == 0 &&
+                 lock_TryMutex(&bp->mx)) {
+                if (bp->refCount == 0 && 
+                    !(bp->flags & CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)) {
+                    prevBp = bp->fileHashBackp;
+                    bp->fileHashBackp = bp->fileHashp = NULL;
+                    if (prevBp)
+                        prevBp->fileHashp = nextBp;
+                    else
+                        cm_data.buf_fileHashTablepp[i] = nextBp;
+                    if (nextBp)
+                        nextBp->fileHashBackp = prevBp;
+
+                    j = BUF_HASH(&bp->fid, &bp->offset);
+                    lbpp = &(cm_data.buf_scacheHashTablepp[j]);
+                    for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
+                        if (tbp == bp) 
+                            break;
+                    }
+
+                    *lbpp = bp->hashp; /* hash out */
+                    bp->hashp = NULL;
+
+                    bp->flags &= ~CM_BUF_INHASH;
+                }
+                lock_ReleaseMutex(&bp->mx);
+            }
+       }
+        lock_ReleaseWrite(&buf_globalLock);
+    }
 
-        if (userp != NULL) {
-               cm_AddACLCache(scp, userp, statusp->CallerAccess);
-        }
+    /* We maintain a range of buffer dataVersion values which are considered 
+     * valid.  This avoids the need to update the dataVersion on each buffer
+     * object during an uncontested storeData operation.  As a result this 
+     * merge status no longer has performance characteristics derived from
+     * the size of the file.
+     */
+    if (((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion > 1) || 
+         (!(flags & CM_MERGEFLAG_STOREDATA) && scp->dataVersion != dataVersion) ||
+         scp->bufDataVersionLow == 0)
+        scp->bufDataVersionLow = dataVersion;
+    
+    scp->dataVersion = dataVersion;
 }
 
 /* note that our stat cache info is incorrect, so force us eventually
@@ -926,110 +1745,178 @@ void cm_MergeStatus(cm_scache_t *scp, AFSFetchStatus *statusp, AFSVolSync *volp,
  */
 void cm_DiscardSCache(cm_scache_t *scp)
 {
-       lock_AssertMutex(&scp->mx);
+    lock_AssertWrite(&scp->rw);
+    if (scp->cbServerp) {
+        cm_PutServer(scp->cbServerp);
        scp->cbServerp = NULL;
-        scp->cbExpires = 0;
-       cm_dnlcPurgedp(scp);
-        cm_FreeAllACLEnts(scp);
+    }
+    scp->cbExpires = 0;
+    scp->flags &= ~CM_SCACHEFLAG_CALLBACK;
+    cm_dnlcPurgedp(scp);
+    cm_dnlcPurgevp(scp);
+    cm_FreeAllACLEnts(scp);
+
+    if (scp->fileType == CM_SCACHETYPE_DFSLINK)
+        cm_VolStatus_Invalidate_DFS_Mapping(scp);
+
+    /* Force mount points and symlinks to be re-evaluated */
+    scp->mountPointStringp[0] = '\0';
 }
 
 void cm_AFSFidFromFid(AFSFid *afsFidp, cm_fid_t *fidp)
 {
-       afsFidp->Volume = fidp->volume;
-        afsFidp->Vnode = fidp->vnode;
-        afsFidp->Unique = fidp->unique;
+    afsFidp->Volume = fidp->volume;
+    afsFidp->Vnode = fidp->vnode;
+    afsFidp->Unique = fidp->unique;
+}       
+
+#ifdef DEBUG_REFCOUNT
+void cm_HoldSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
+#else
+void cm_HoldSCacheNoLock(cm_scache_t *scp)
+#endif
+{     
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_AssertAny(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    osi_Log2(afsd_logp,"cm_HoldSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCacheNoLock scp 0x%p, ref %d", file, line, scp, refCount);
+#endif
 }
 
+#ifdef DEBUG_REFCOUNT
+void cm_HoldSCacheDbg(cm_scache_t *scp, char * file, long line)
+#else
 void cm_HoldSCache(cm_scache_t *scp)
+#endif
 {
-       lock_ObtainWrite(&cm_scacheLock);
-       osi_assert(scp->refCount > 0);
-       scp->refCount++;
-       lock_ReleaseWrite(&cm_scacheLock);
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    osi_Log2(afsd_logp,"cm_HoldSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCache scp 0x%p ref %d", file, line, scp, refCount);
+#endif
+    lock_ReleaseRead(&cm_scacheLock);
 }
 
-void cm_ReleaseSCache(cm_scache_t *scp)
+#ifdef DEBUG_REFCOUNT
+void cm_ReleaseSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
+#else
+void cm_ReleaseSCacheNoLock(cm_scache_t *scp)
+#endif
 {
-       lock_ObtainWrite(&cm_scacheLock);
-       osi_assert(scp->refCount-- > 0);
-       lock_ReleaseWrite(&cm_scacheLock);
+    afs_int32 refCount;
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_AssertAny(&cm_scacheLock);
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
+       osi_Log1(afsd_logp,"cm_ReleaseSCacheNoLock about to panic scp 0x%x",scp);
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
+#ifdef DEBUG_REFCOUNT
+    osi_Log2(afsd_logp,"cm_ReleaseSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCacheNoLock scp 0x%p ref %d", file, line, scp, refCount);
+#endif
+}
+
+#ifdef DEBUG_REFCOUNT
+void cm_ReleaseSCacheDbg(cm_scache_t *scp, char * file, long line)
+#else
+void cm_ReleaseSCache(cm_scache_t *scp)
+#endif
+{     
+    afs_int32 refCount;
+
+    osi_assertx(scp != NULL, "null cm_scache_t");
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
+       osi_Log1(afsd_logp,"cm_ReleaseSCache about to panic scp 0x%x",scp);
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
+#ifdef DEBUG_REFCOUNT
+    osi_Log2(afsd_logp,"cm_ReleaseSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCache scp 0x%p ref %d", file, line, scp, refCount);
+#endif
+    lock_ReleaseRead(&cm_scacheLock);
 }
 
 /* just look for the scp entry to get filetype */
 /* doesn't need to be perfectly accurate, so locking doesn't matter too much */
 int cm_FindFileType(cm_fid_t *fidp)
 {
-        long hash;
-        cm_scache_t *scp;
+    long hash;
+    cm_scache_t *scp;
         
-        hash = CM_SCACHE_HASH(fidp);
+    hash = CM_SCACHE_HASH(fidp);
         
-        osi_assert(fidp->cell != 0);
+    osi_assertx(fidp->cell != 0, "unassigned cell value");
 
-        lock_ObtainWrite(&cm_scacheLock);
-        for(scp=cm_hashTablep[hash]; scp; scp=scp->nextp) {
-                if (cm_FidCmp(fidp, &scp->fid) == 0) {
-                  /*scp->refCount++;*/
-                  /*cm_AdjustLRU(scp);*/
-                  lock_ReleaseWrite(&cm_scacheLock);
-                  return scp->fileType;
-                }
+    lock_ObtainWrite(&cm_scacheLock);
+    for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
+        if (cm_FidCmp(fidp, &scp->fid) == 0) {
+            lock_ReleaseWrite(&cm_scacheLock);
+            return scp->fileType;
         }
-        lock_ReleaseWrite(&cm_scacheLock);
-        return 0;
+    }
+    lock_ReleaseWrite(&cm_scacheLock);
+    return 0;
 }
 
 /* dump all scp's that have reference count > 0 to a file. 
  * cookie is used to identify this batch for easy parsing, 
  * and it a string provided by a caller 
  */
-int cm_DumpSCache(FILE *outputFile, char *cookie)
+int cm_DumpSCache(FILE *outputFile, char *cookie, int lock)
 {
     int zilch;
     cm_scache_t *scp;
-    char output[1024];
+    char output[2048];
     int i;
   
-    lock_ObtainRead(&cm_scacheLock);
+    if (lock)
+        lock_ObtainRead(&cm_scacheLock);
   
-    sprintf(output, "%s - dumping scache - cm_currentSCaches=%d, cm_maxSCaches=%d\n", cookie, cm_currentSCaches, cm_maxSCaches);
-    WriteFile(outputFile, output, strlen(output), &zilch, NULL);
+    sprintf(output, "%s - dumping all scache - cm_data.currentSCaches=%d, cm_data.maxSCaches=%d\r\n", cookie, cm_data.currentSCaches, cm_data.maxSCaches);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
   
-    for (scp = cm_scacheLRULastp; scp; scp = (cm_scache_t *) osi_QPrev(&scp->q)) 
+    for (scp = cm_data.allSCachesp; scp; scp = scp->allNextp) 
     {
-        if (scp->refCount != 0)
-        {
-            sprintf(output, "%s fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%d\n", 
-                    cookie, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique, 
-                    scp->refCount);
-            WriteFile(outputFile, output, strlen(output), &zilch, NULL);
-        }
+        sprintf(output, "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) volp=0x%p type=%d dv=%I64d len=0x%I64x mp='%s' flags=0x%x cb=0x%x refCount=%u\r\n", 
+                cookie, scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique, 
+                scp->volp, scp->fileType, scp->dataVersion, scp->length.QuadPart, scp->mountPointStringp, scp->flags,
+                (unsigned long)scp->cbExpires, scp->refCount);
+        WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
     }
   
-    sprintf(output, "%s - dumping cm_hashTable - cm_hashTableSize=%d\n", cookie, cm_hashTableSize);
-    WriteFile(outputFile, output, strlen(output), &zilch, NULL);
+    sprintf(output, "%s - Done dumping all scache.\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+    sprintf(output, "%s - dumping cm_data.scacheHashTable - cm_data.scacheHashTableSize=%d\r\n", cookie, cm_data.scacheHashTableSize);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
   
-    for (i = 0; i < cm_hashTableSize; i++)
+    for (i = 0; i < cm_data.scacheHashTableSize; i++)
     {
-        for(scp = cm_hashTablep[i]; scp; scp=scp->nextp) 
+        for(scp = cm_data.scacheHashTablep[i]; scp; scp=scp->nextp) 
         {
-            if (scp)
-            {
-                if (scp->refCount)
-                {
-                    sprintf(output, "%s scp=0x%08X, hash=%d, fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%d\n", 
-                            cookie, (void *)scp, i, scp->fid.cell, scp->fid.volume, scp->fid.vnode, 
-                            scp->fid.unique, scp->refCount);
-                    WriteFile(outputFile, output, strlen(output), &zilch, NULL);
-                }
-            }
+            sprintf(output, "%s scp=0x%p, hash=%d, fid (cell=%d, volume=%d, vnode=%d, unique=%d)\r\n", 
+                    cookie, scp, i, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique);
+            WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
         }
     }
 
-    sprintf(output, "%s - Done dumping scache.\n", cookie);
-    WriteFile(outputFile, output, strlen(output), &zilch, NULL);
+    sprintf(output, "%s - Done dumping cm_data.scacheHashTable\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
   
-    lock_ReleaseRead(&cm_scacheLock);       
+    if (lock)
+        lock_ReleaseRead(&cm_scacheLock);       
     return (0);     
 }