Windows: cm_GetSCache do not release unheld lock
[openafs.git] / src / WINNT / afsd / cm_scache.c
index 5719838..dcbe79b 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "afsd.h"
 #include "cm_btree.h"
+#include <afs/unified_afs.h>
 
 /*extern void afsi_log(char *pattern, ...);*/
 
@@ -154,64 +155,11 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
        return -1;
     }
 
-    cm_RemoveSCacheFromHashTable(scp);
-
-#if 0
-    if (flags & CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS) {
-       osi_queueData_t *qdp;
-       cm_buf_t *bufp;
-
-       while(qdp = scp->bufWritesp) {
-            bufp = osi_GetQData(qdp);
-           osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q);
-           osi_QDFree(qdp);
-           if (bufp) {
-               lock_ObtainMutex(&bufp->mx);
-               _InterlockedAnd(&bufp->cmFlags, ~CM_BUF_CMSTORING);
-               _InterlockedAnd(&bufp->flags, ~CM_BUF_DIRTY);
-                bufp->dirty_offset = 0;
-                bufp->dirty_length = 0;
-               _InterlockedOr(&bufp->flags, CM_BUF_ERROR);
-               bufp->error = VNOVNODE;
-               bufp->dataVersion = CM_BUF_VERSION_BAD; /* bad */
-               bufp->dirtyCounter++;
-               if (bufp->flags & CM_BUF_WAITING) {
-                   osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%p] bufp 0x%x", scp, bufp);
-                   osi_Wakeup((long) &bufp);
-               }
-               lock_ReleaseMutex(&bufp->mx);
-               buf_Release(bufp);
-           }
-        }
-       while(qdp = scp->bufReadsp) {
-            bufp = osi_GetQData(qdp);
-           osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q);
-           osi_QDFree(qdp);
-           if (bufp) {
-               lock_ObtainMutex(&bufp->mx);
-               _InterlockedAnd(&bufp->cmFlags, ~CM_BUF_CMFETCHING);
-               _InterlockedAnd(&bufp->flags, ~CM_BUF_DIRTY);
-                bufp->dirty_offset = 0;
-                bufp->dirty_length = 0;
-               _InterlockedOr(&bufp->flags, CM_BUF_ERROR);
-               bufp->error = VNOVNODE;
-               bufp->dataVersion = CM_BUF_VERSION_BAD; /* bad */
-               bufp->dirtyCounter++;
-               if (bufp->flags & CM_BUF_WAITING) {
-                   osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%p] bufp 0x%x", scp, bufp);
-                   osi_Wakeup((long) &bufp);
-               }
-               lock_ReleaseMutex(&bufp->mx);
-               buf_Release(bufp);
-           }
-        }
-       buf_CleanDirtyBuffers(scp);
-    } else {
-       /* look for things that shouldn't still be set */
-       osi_assertx(scp->bufWritesp == NULL, "non-null cm_scache_t bufWritesp");
-       osi_assertx(scp->bufReadsp == NULL, "non-null cm_scache_t bufReadsp");
+    if (scp->redirBufCount != 0) {
+        return -1;
     }
-#endif
+
+    cm_RemoveSCacheFromHashTable(scp);
 
     /* invalidate so next merge works fine;
      * also initialize some flags */
@@ -281,93 +229,90 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
  * Can allocate a new one if desperate, or if below quota (cm_data.maxSCaches).
  * returns scp->rw write-locked.
  */
-cm_scache_t *cm_GetNewSCache(void)
+cm_scache_t *
+cm_GetNewSCache(afs_uint32 locked)
 {
-    cm_scache_t *scp;
+    cm_scache_t *scp = NULL;
     int retry = 0;
 
-    lock_AssertWrite(&cm_scacheLock);
-#if 0
-    /* first pass - look for deleted objects */
-    for ( scp = cm_data.scacheLRULastp;
-         scp;
-         scp = (cm_scache_t *) osi_QPrev(&scp->q))
-    {
-       osi_assertx(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.scacheHashTablep,
-                    "invalid cm_scache_t address");
-
-       if (scp->refCount == 0) {
-           if (scp->flags & CM_SCACHEFLAG_DELETED) {
-                if (!lock_TryWrite(&scp->rw))
-                    continue;
-
-               osi_Log1(afsd_logp, "GetNewSCache attempting to recycle deleted scp 0x%p", scp);
-               if (!cm_RecycleSCache(scp, CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS)) {
-
-                   /* we found an entry, so return it */
-                   /* now remove from the LRU queue and put it back at the
-                    * head of the LRU queue.
-                    */
-                   cm_AdjustScacheLRU(scp);
-
-                   /* and we're done */
-                   return scp;
-               }
-                lock_ReleaseWrite(&scp->rw);
-               osi_Log1(afsd_logp, "GetNewSCache recycled failed scp 0x%p", scp);
-           } else if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
-                if (!lock_TryWrite(&scp->rw))
-                    continue;
-
-               /* we found an entry, so return it */
-               /* now remove from the LRU queue and put it back at the
-               * head of the LRU queue.
-               */
-               cm_AdjustScacheLRU(scp);
-
-               /* and we're done */
-               return scp;
-           }
-       }
-    }
-    osi_Log0(afsd_logp, "GetNewSCache no deleted or recycled entries available for reuse");
-#endif
+    if (locked)
+        lock_AssertWrite(&cm_scacheLock);
+    else
+        lock_ObtainWrite(&cm_scacheLock);
 
     if (cm_data.currentSCaches >= cm_data.maxSCaches) {
        /* There were no deleted scache objects that we could use.  Try to find
         * one that simply hasn't been used in a while.
         */
-        for ( scp = cm_data.scacheLRULastp;
-              scp;
-              scp = (cm_scache_t *) osi_QPrev(&scp->q))
-        {
-            /* It is possible for the refCount to be zero and for there still
-             * to be outstanding dirty buffers.  If there are dirty buffers,
-             * we must not recycle the scp. */
-            if (scp->refCount == 0 && scp->bufReadsp == NULL && scp->bufWritesp == NULL) {
-                if (!buf_DirtyBuffersExist(&scp->fid)) {
-                    if (!lock_TryWrite(&scp->rw))
-                        continue;
-
-                    if (!cm_RecycleSCache(scp, 0)) {
-                        /* we found an entry, so return it */
-                        /* now remove from the LRU queue and put it back at the
-                         * head of the LRU queue.
-                         */
-                        cm_AdjustScacheLRU(scp);
-
-                        /* and we're done */
-                        return scp;
+        for (retry = 0 ; retry < 2; retry++) {
+            for ( scp = cm_data.scacheLRULastp;
+                  scp;
+                  scp = (cm_scache_t *) osi_QPrev(&scp->q))
+            {
+                /* It is possible for the refCount to be zero and for there still
+                 * to be outstanding dirty buffers.  If there are dirty buffers,
+                 * we must not recycle the scp.
+                 *
+                 * If the object is in use by the redirector, then avoid recycling
+                 * it unless we have to.
+                 */
+                if (scp->refCount == 0 && scp->bufReadsp == NULL && scp->bufWritesp == NULL) {
+                    afs_uint32 buf_dirty = 0;
+                    afs_uint32 buf_rdr = 0;
+
+                    lock_ReleaseWrite(&cm_scacheLock);
+                    buf_dirty = buf_DirtyBuffersExist(&scp->fid);
+                    if (!buf_dirty)
+                        buf_rdr = buf_RDRBuffersExist(&scp->fid);
+                    lock_ObtainWrite(&cm_scacheLock);
+
+                    if (!buf_dirty && !buf_rdr) {
+                        cm_fid_t   fid;
+                        afs_uint32 fileType;
+
+                        if (!lock_TryWrite(&scp->rw))
+                            continue;
+
+                        /* Found a likely candidate.  Save type and fid in case we succeed */
+                        fid = scp->fid;
+                        fileType = scp->fileType;
+
+                        if (!cm_RecycleSCache(scp, 0)) {
+                            /* we found an entry, so return it.
+                             * remove from the LRU queue and put it back at the
+                             * head of the LRU queue.
+                             */
+                            cm_AdjustScacheLRU(scp);
+
+                            if (RDR_Initialized) {
+                                /*
+                                 * We drop the cm_scacheLock because it may be required to
+                                 * satisfy an ioctl request from the redirector.  It should
+                                 * be safe to hold the scp->rw lock here because at this
+                                 * point (a) the object has just been recycled so the fid
+                                 * is nul and there are no requests that could possibly
+                                 * be issued by the redirector that would depend upon it.
+                                 */
+                                lock_ReleaseWrite(&cm_scacheLock);
+                                RDR_InvalidateObject( fid.cell, fid.volume, fid.vnode,
+                                                      fid.unique, fid.hash,
+                                                      fileType, AFS_INVALIDATE_EXPIRED);
+                                lock_ObtainWrite(&cm_scacheLock);
+                            }
+
+                            /* and we're done */
+                            osi_assertx(!(scp->flags & CM_SCACHEFLAG_INHASH), "CM_SCACHEFLAG_INHASH set");
+                            goto done;
+                        }
+                        lock_ReleaseWrite(&scp->rw);
+                    } else {
+                        osi_Log1(afsd_logp,"GetNewSCache dirty buffers exist scp 0x%p", scp);
                     }
-                    lock_ReleaseWrite(&scp->rw);
-                } else {
-                    osi_Log1(afsd_logp,"GetNewSCache dirty buffers exist scp 0x%x", scp);
                 }
             }
+            osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry);
         }
-        osi_Log1(afsd_logp, "GetNewSCache all scache entries in use (retry = %d)", retry);
-
-        return NULL;
+        goto done;
     }
 
     /* if we get here, we should allocate a new scache entry.  We either are below
@@ -384,17 +329,21 @@ cm_scache_t *cm_GetNewSCache(void)
 #ifdef USE_BPLUS
     lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock", LOCK_HIERARCHY_SCACHE_DIRLOCK);
 #endif
+    lock_InitializeMutex(&scp->redirMx, "cm_scache_t redirMx", LOCK_HIERARCHY_SCACHE_REDIRMX);
     scp->serverLock = -1;
 
     /* and put it in the LRU queue */
-    osi_QAdd((osi_queue_t **) &cm_data.scacheLRUFirstp, &scp->q);
-    if (!cm_data.scacheLRULastp)
-        cm_data.scacheLRULastp = scp;
+    osi_QAddH((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **)&cm_data.scacheLRULastp, &scp->q);
     cm_data.currentSCaches++;
     cm_dnlcPurgedp(scp); /* make doubly sure that this is not in dnlc */
     cm_dnlcPurgevp(scp);
     scp->allNextp = cm_data.allSCachesp;
     cm_data.allSCachesp = scp;
+
+  done:
+    if (!locked)
+        lock_ReleaseWrite(&cm_scacheLock);
+
     return scp;
 }
 
@@ -404,7 +353,7 @@ void cm_SetFid(cm_fid_t *fidp, afs_uint32 cell, afs_uint32 volume, afs_uint32 vn
     fidp->volume = volume;
     fidp->vnode = vnode;
     fidp->unique = unique;
-    fidp->hash = ((cell & 0xF) << 28) | ((volume & 0x3F) << 22) | ((vnode & 0x7FF) << 11) | (unique & 0x7FF);
+    CM_FID_GEN_HASH(fidp);
 }
 
 /* like strcmp, only for fids */
@@ -442,6 +391,7 @@ void cm_fakeSCacheInit(int newFile)
     lock_InitializeRWLock(&cm_data.fakeSCache.rw, "cm_scache_t rw", LOCK_HIERARCHY_SCACHE);
     lock_InitializeRWLock(&cm_data.fakeSCache.bufCreateLock, "cm_scache_t bufCreateLock", LOCK_HIERARCHY_SCACHE_BUFCREATE);
     lock_InitializeRWLock(&cm_data.fakeSCache.dirlock, "cm_scache_t dirlock", LOCK_HIERARCHY_SCACHE_DIRLOCK);
+    lock_InitializeMutex(&cm_data.fakeSCache.redirMx, "cm_scache_t redirMx", LOCK_HIERARCHY_SCACHE_REDIRMX);
 }
 
 long
@@ -624,6 +574,7 @@ cm_ShutdownSCache(void)
 #endif
         lock_FinalizeRWLock(&scp->rw);
         lock_FinalizeRWLock(&scp->bufCreateLock);
+        lock_FinalizeMutex(&scp->redirMx);
     }
     lock_ReleaseWrite(&cm_scacheLock);
 
@@ -666,12 +617,18 @@ void cm_InitSCache(int newFile, long maxSCaches)
                 scp->openShares = 0;
                 scp->openExcls = 0;
                 scp->waitCount = 0;
+                scp->activeRPCs = 0;
 #ifdef USE_BPLUS
                 scp->dirBplus = NULL;
                 scp->dirDataVersion = CM_SCACHE_VERSION_BAD;
 #endif
                 scp->waitQueueT = NULL;
-                _InterlockedAnd(&scp->flags, ~CM_SCACHEFLAG_WAITING);
+                _InterlockedAnd(&scp->flags, ~(CM_SCACHEFLAG_CALLBACK | CM_SCACHEFLAG_WAITING | CM_SCACHEFLAG_RDR_IN_USE));
+
+                scp->redirBufCount = 0;
+                scp->redirQueueT = NULL;
+                scp->redirQueueH = NULL;
+                lock_InitializeMutex(&scp->redirMx, "cm_scache_t redirMx", LOCK_HIERARCHY_SCACHE_REDIRMX);
             }
         }
         cm_allFileLocks = NULL;
@@ -720,12 +677,14 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
 {
     long hash;
     cm_scache_t *scp = NULL;
+    cm_scache_t *newScp = NULL;
     long code;
     cm_volume_t *volp = NULL;
     cm_cell_t *cellp;
     int special = 0; // yj: boolean variable to test if file is on root.afs
     int isRoot = 0;
     extern cm_fid_t cm_rootFid;
+    afs_int32 refCount;
 
     hash = CM_SCACHE_HASH(fidp);
 
@@ -743,7 +702,7 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
 
     // yj: check if we have the scp, if so, we don't need
     // to do anything else
-    lock_ObtainWrite(&cm_scacheLock);
+    lock_ObtainRead(&cm_scacheLock);
     for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
 #ifdef DEBUG_REFCOUNT
@@ -757,11 +716,13 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
 #endif
             cm_HoldSCacheNoLock(scp);
             *outScpp = scp;
+            lock_ConvertRToW(&cm_scacheLock);
             cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             return 0;
         }
     }
+    lock_ReleaseRead(&cm_scacheLock);
 
     // yj: when we get here, it means we don't have an scp
     // so we need to either load it or fake it, depending
@@ -781,7 +742,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     }
 
     if (cm_freelanceEnabled && special) {
-        lock_ReleaseWrite(&cm_scacheLock);
         osi_Log0(afsd_logp,"cm_GetSCache Freelance and special");
 
         if (cm_getLocalMountPointChange()) {
@@ -789,18 +749,14 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
             cm_reInitLocalMountPoints();
         }
 
-        lock_ObtainWrite(&cm_scacheLock);
         if (scp == NULL) {
-            scp = cm_GetNewSCache();    /* returns scp->rw held */
+            scp = cm_GetNewSCache(FALSE);    /* returns scp->rw held */
             if (scp == NULL) {
                 osi_Log0(afsd_logp,"cm_GetSCache unable to obtain *new* scache entry");
-                lock_ReleaseWrite(&cm_scacheLock);
                 return CM_ERROR_WOULDBLOCK;
             }
         } else {
-            lock_ReleaseWrite(&cm_scacheLock);
             lock_ObtainWrite(&scp->rw);
-            lock_ObtainWrite(&cm_scacheLock);
         }
         scp->fid = *fidp;
         scp->dotdotFid.cell=AFS_FAKE_ROOT_CELL_ID;
@@ -808,13 +764,15 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
         scp->dotdotFid.unique=1;
         scp->dotdotFid.vnode=1;
         _InterlockedOr(&scp->flags, (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO));
+        lock_ObtainWrite(&cm_scacheLock);
         if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
             scp->nextp = cm_data.scacheHashTablep[hash];
             cm_data.scacheHashTablep[hash] = scp;
             _InterlockedOr(&scp->flags, CM_SCACHEFLAG_INHASH);
         }
-        scp->refCount = 1;
-       osi_Log1(afsd_logp,"cm_GetSCache (freelance) sets refCount to 1 scp 0x%p", scp);
+        refCount = InterlockedIncrement(&scp->refCount);
+       osi_Log2(afsd_logp,"cm_GetSCache (freelance) sets refCount to 1 scp 0x%p refCount %d", scp, refCount);
+        lock_ReleaseWrite(&cm_scacheLock);
 
         /* must be called after the scp->fid is set */
         cm_FreelanceFetchMountPointString(scp);
@@ -834,7 +792,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
         scp->lockDataVersion=CM_SCACHE_VERSION_BAD; /* no lock yet */
         scp->fsLockCount=0;
         lock_ReleaseWrite(&scp->rw);
-        lock_ReleaseWrite(&cm_scacheLock);
        *outScpp = scp;
 #ifdef DEBUG_REFCOUNT
        afsi_log("%s:%d cm_GetSCache (2) scp 0x%p ref %d", file, line, scp, scp->refCount);
@@ -845,54 +802,72 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     // end of yj code
 #endif /* AFS_FREELANCE_CLIENT */
 
+    /* we don't have the fid, recycle something */
+    newScp = cm_GetNewSCache(FALSE);    /* returns scp->rw held */
+    if (newScp == NULL) {
+       osi_Log0(afsd_logp,"cm_GetNewSCache unable to obtain *new* scache entry");
+       return CM_ERROR_WOULDBLOCK;
+    }
+#ifdef DEBUG_REFCOUNT
+    afsi_log("%s:%d cm_GetNewSCache returns scp 0x%p flags 0x%x", file, line, newScp, newScp->flags);
+#endif
+    osi_Log2(afsd_logp,"cm_GetNewSCache returns scp 0x%p flags 0x%x", newScp, newScp->flags);
+
     /* otherwise, we need to find the volume */
     if (!cm_freelanceEnabled || !isRoot) {
-        lock_ReleaseWrite(&cm_scacheLock);     /* for perf. reasons */
         cellp = cm_FindCellByID(fidp->cell, 0);
-        if (!cellp)
+        if (!cellp) {
+            /* put back newScp so it can be reused */
+            lock_ObtainWrite(&cm_scacheLock);
+            newScp->flags |= CM_SCACHEFLAG_DELETED;
+            cm_AdjustScacheLRU(newScp);
+            lock_ReleaseWrite(&newScp->rw);
+            lock_ReleaseWrite(&cm_scacheLock);
             return CM_ERROR_NOSUCHCELL;
+        }
 
         code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_CREATE, &volp);
-        if (code)
+        if (code) {
+            /* put back newScp so it can be reused */
+            lock_ObtainWrite(&cm_scacheLock);
+            newScp->flags |= CM_SCACHEFLAG_DELETED;
+            cm_AdjustScacheLRU(newScp);
+            lock_ReleaseWrite(&newScp->rw);
+            lock_ReleaseWrite(&cm_scacheLock);
             return code;
-        lock_ObtainWrite(&cm_scacheLock);
+        }
     }
 
-    /* otherwise, we have the volume, now reverify that the scp doesn't
-     * exist, and proceed.
+    /*
+     * otherwise, we have the volume, now reverify that the scp doesn't
+     * exist, and proceed.  make sure that we hold the cm_scacheLock
+     * write-locked until the scp is put into the hash table in order
+     * to avoid a race.
      */
+    lock_ObtainWrite(&cm_scacheLock);
     for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
 #ifdef DEBUG_REFCOUNT
            afsi_log("%s:%d cm_GetSCache (3) scp 0x%p ref %d", file, line, scp, scp->refCount);
            osi_Log1(afsd_logp,"cm_GetSCache (3) scp 0x%p", scp);
 #endif
+            if (volp)
+                cm_PutVolume(volp);
             cm_HoldSCacheNoLock(scp);
             cm_AdjustScacheLRU(scp);
+
+            /* put back newScp so it can be reused */
+            newScp->flags |= CM_SCACHEFLAG_DELETED;
+            cm_AdjustScacheLRU(newScp);
+            lock_ReleaseWrite(&newScp->rw);
             lock_ReleaseWrite(&cm_scacheLock);
-            if (volp)
-                cm_PutVolume(volp);
+
             *outScpp = scp;
             return 0;
         }
     }
 
-    /* now, if we don't have the fid, recycle something */
-    scp = cm_GetNewSCache();    /* returns scp->rw held */
-    if (scp == NULL) {
-       osi_Log0(afsd_logp,"cm_GetNewSCache unable to obtain *new* scache entry");
-       lock_ReleaseWrite(&cm_scacheLock);
-       if (volp)
-           cm_PutVolume(volp);
-       return CM_ERROR_WOULDBLOCK;
-    }
-#ifdef DEBUG_REFCOUNT
-    afsi_log("%s:%d cm_GetNewSCache returns scp 0x%p flags 0x%x", file, line, scp, scp->flags);
-#endif
-    osi_Log2(afsd_logp,"cm_GetNewSCache returns scp 0x%p flags 0x%x", scp, scp->flags);
-
-    osi_assertx(!(scp->flags & CM_SCACHEFLAG_INHASH), "CM_SCACHEFLAG_INHASH set");
-
+    scp = newScp;
     scp->fid = *fidp;
     if (!cm_freelanceEnabled || !isRoot) {
         /* if this scache entry represents a volume root then we need
@@ -914,15 +889,17 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     }
     if (volp)
         cm_PutVolume(volp);
+
     scp->nextp = cm_data.scacheHashTablep[hash];
     cm_data.scacheHashTablep[hash] = scp;
     _InterlockedOr(&scp->flags, CM_SCACHEFLAG_INHASH);
+    refCount = InterlockedIncrement(&scp->refCount);
+    lock_ReleaseWrite(&cm_scacheLock);
     lock_ReleaseWrite(&scp->rw);
-    scp->refCount = 1;
 #ifdef DEBUG_REFCOUNT
-    afsi_log("%s:%d cm_GetSCache sets refCount to 1 scp 0x%p", file, line, scp);
+    afsi_log("%s:%d cm_GetSCache sets refCount to 1 scp 0x%p refCount %d", file, line, scp, refCount);
 #endif
-    osi_Log1(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%p", scp);
+    osi_Log2(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%p refCount %d", scp, refCount);
 
     /* XXX - The following fields in the cm_scache are
      * uninitialized:
@@ -937,7 +914,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     afsi_log("%s:%d cm_GetSCache (4) scp 0x%p ref %d", file, line, scp, scp->refCount);
     osi_Log1(afsd_logp,"cm_GetSCache (4) scp 0x%p", scp);
 #endif
-    lock_ReleaseWrite(&cm_scacheLock);
     return 0;
 }
 
@@ -1085,6 +1061,9 @@ int cm_SyncOpCheckContinue(cm_scache_t * scp, afs_int32 flags, cm_buf_t * bufp)
  * possibly resulting in a bogus truncation.  The simplest way to avoid this
  * is to serialize all StoreData RPC's.  This is the reason we defined
  * CM_SCACHESYNC_STOREDATA_EXCL and CM_SCACHEFLAG_DATASTORING.
+ *
+ * CM_SCACHESYNC_BULKREAD is used to permit synchronization of multiple bulk
+ * readers which may be requesting overlapping ranges.
  */
 long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *reqp,
                afs_uint32 rights, afs_uint32 flags)
@@ -1293,7 +1272,7 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
             if ((rights & (PRSFS_WRITE|PRSFS_DELETE)) && (scp->flags & CM_SCACHEFLAG_RO))
                 return CM_ERROR_READONLY;
 
-            if (cm_HaveAccessRights(scp, userp, rights, &outRights)) {
+            if (cm_HaveAccessRights(scp, userp, reqp, rights, &outRights)) {
                 if (~outRights & rights)
                    return CM_ERROR_NOACCESS;
             }
@@ -1312,6 +1291,14 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
             }
         }
 
+        if (flags & CM_SCACHESYNC_BULKREAD) {
+            /* Don't allow concurrent fiddling with lock lists */
+            if (scp->flags & CM_SCACHEFLAG_BULKREADING) {
+                osi_Log1(afsd_logp, "CM SyncOp scp 0x%p is BULKREADING want BULKREAD", scp);
+                goto sleep;
+            }
+        }
+
         /* if we get here, we're happy */
         break;
 
@@ -1380,6 +1367,8 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
         _InterlockedOr(&scp->flags, CM_SCACHEFLAG_ASYNCSTORING);
     if (flags & CM_SCACHESYNC_LOCK)
         _InterlockedOr(&scp->flags, CM_SCACHEFLAG_LOCKING);
+    if (flags & CM_SCACHESYNC_BULKREAD)
+        _InterlockedOr(&scp->flags, CM_SCACHEFLAG_BULKREADING);
 
     /* now update the buffer pointer */
     if (bufp && (flags & CM_SCACHESYNC_FETCHDATA)) {
@@ -1451,6 +1440,8 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
         _InterlockedAnd(&scp->flags, ~CM_SCACHEFLAG_ASYNCSTORING);
     if (flags & CM_SCACHESYNC_LOCK)
         _InterlockedAnd(&scp->flags, ~CM_SCACHEFLAG_LOCKING);
+    if (flags & CM_SCACHESYNC_BULKREAD)
+        _InterlockedAnd(&scp->flags, ~CM_SCACHEFLAG_BULKREADING);
 
     /* now update the buffer pointer */
     if (bufp && (flags & CM_SCACHESYNC_FETCHDATA)) {
@@ -1532,9 +1523,13 @@ void cm_MergeStatus(cm_scache_t *dscp,
     afs_uint64 dataVersion;
     struct cm_volume *volp = NULL;
     struct cm_cell *cellp = NULL;
+    int rdr_invalidate = 0;
+    afs_uint32 activeRPCs;
 
     lock_AssertWrite(&scp->rw);
 
+    activeRPCs = 1 + InterlockedDecrement(&scp->activeRPCs);
+
     // yj: i want to create some fake status for the /afs directory and the
     // entries under that directory
 #ifdef AFS_FREELANCE_CLIENT
@@ -1572,10 +1567,20 @@ void cm_MergeStatus(cm_scache_t *dscp,
 #endif /* AFS_FREELANCE_CLIENT */
 
     if (statusp->errorCode != 0) {
-        _InterlockedOr(&scp->flags, CM_SCACHEFLAG_EACCESS);
-       osi_Log2(afsd_logp, "Merge, Failure scp 0x%p code 0x%x", scp, statusp->errorCode);
+       _InterlockedOr(&scp->flags, CM_SCACHEFLAG_EACCESS);
+        switch (statusp->errorCode) {
+        case EACCES:
+        case UAEACCES:
+        case EPERM:
+        case UAEPERM:
+            _InterlockedOr(&scp->flags, CM_SCACHEFLAG_EACCESS);
+        }
+        osi_Log2(afsd_logp, "Merge, Failure scp 0x%p code 0x%x", scp, statusp->errorCode);
 
-       scp->fileType = 0;      /* unknown */
+        if (scp->fid.vnode & 0x1)
+            scp->fileType = CM_SCACHETYPE_DIRECTORY;
+        else
+            scp->fileType = 0; /* unknown */
 
        scp->serverModTime = 0;
        scp->clientModTime = 0;
@@ -1691,7 +1696,9 @@ void cm_MergeStatus(cm_scache_t *dscp,
     /* and other stuff */
     scp->parentVnode = statusp->ParentVnode;
     scp->parentUnique = statusp->ParentUnique;
-    scp->fsLockCount = statusp->lockCount;
+
+    /* -1 is a write lock; any positive values are read locks */
+    scp->fsLockCount = (afs_int32)statusp->lockCount;
 
     /* and merge in the private acl cache info, if this is more than the public
      * info; merge in the public stuff in any case.
@@ -1704,13 +1711,13 @@ void cm_MergeStatus(cm_scache_t *dscp,
 
     if (scp->dataVersion != 0 &&
         (!(flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion != scp->dataVersion ||
-         (flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion - scp->dataVersion > 1)) {
+         (flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion - scp->dataVersion > activeRPCs)) {
         /*
          * We now know that all of the data buffers that we have associated
          * with this scp are invalid.  Subsequent operations will go faster
          * if the buffers are removed from the hash tables.
          *
-         * We do not remove directory buffers if the dataVersion delta is 1 because
+         * We do not remove directory buffers if the dataVersion delta is 'activeRPCs' because
          * those version numbers will be updated as part of the directory operation.
          *
          * We do not remove storedata buffers because they will still be valid.
@@ -1736,7 +1743,8 @@ void cm_MergeStatus(cm_scache_t *dscp,
             if (cm_FidCmp(&scp->fid, &bp->fid) == 0 &&
                  lock_TryMutex(&bp->mx)) {
                 if (bp->refCount == 0 &&
-                    !(bp->flags & CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)) {
+                    !(bp->flags & (CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)) &&
+                    !(bp->qFlags & CM_BUF_QREDIR)) {
                     prevBp = bp->fileHashBackp;
                     bp->fileHashBackp = bp->fileHashp = NULL;
                     if (prevBp)
@@ -1748,11 +1756,14 @@ void cm_MergeStatus(cm_scache_t *dscp,
 
                     j = BUF_HASH(&bp->fid, &bp->offset);
                     lbpp = &(cm_data.buf_scacheHashTablepp[j]);
-                    for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
+                    for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = tbp->hashp) {
                         if (tbp == bp)
                             break;
                     }
 
+                    /* we better find it */
+                    osi_assertx(tbp != NULL, "cm_MergeStatus: buf_scacheHashTablepp table screwup");
+
                     *lbpp = bp->hashp; /* hash out */
                     bp->hashp = NULL;
 
@@ -1770,20 +1781,39 @@ void cm_MergeStatus(cm_scache_t *dscp,
      * does not update a mountpoint or symlink by altering the contents of
      * the file data; but the Unix CM does.
      */
-    if (scp->dataVersion != dataVersion && !(flags & CM_MERGEFLAG_FETCHDATA))
+    if (scp->dataVersion != dataVersion && !(flags & CM_MERGEFLAG_FETCHDATA)) {
         scp->mountPointStringp[0] = '\0';
 
+        osi_Log5(afsd_logp, "cm_MergeStatus data version change scp 0x%p cell %u vol %u vn %u uniq %u",
+                 scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique);
+
+        osi_Log4(afsd_logp, ".... oldDV 0x%x:%x -> newDV 0x%x:%x",
+                 (afs_uint32)((scp->dataVersion >> 32) & 0xFFFFFFFF),
+                 (afs_uint32)(scp->dataVersion & 0xFFFFFFFF),
+                 (afs_uint32)((dataVersion >> 32) & 0xFFFFFFFF),
+                 (afs_uint32)(dataVersion & 0xFFFFFFFF));
+    }
+
     /* We maintain a range of buffer dataVersion values which are considered
      * valid.  This avoids the need to update the dataVersion on each buffer
      * object during an uncontested storeData operation.  As a result this
      * merge status no longer has performance characteristics derived from
      * the size of the file.
      */
-    if (((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion > 1) ||
+    if (((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion > activeRPCs) ||
          (!(flags & CM_MERGEFLAG_STOREDATA) && scp->dataVersion != dataVersion) ||
          scp->bufDataVersionLow == 0)
         scp->bufDataVersionLow = dataVersion;
 
+    if (RDR_Initialized && scp->dataVersion != CM_SCACHE_VERSION_BAD) {
+        if ( ( !(reqp->flags & CM_REQ_SOURCE_REDIR) || !(flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA))) &&
+             scp->dataVersion != dataVersion && (dataVersion - scp->dataVersion > activeRPCs - 1)) {
+            rdr_invalidate = 1;
+        } else if ( (reqp->flags & CM_REQ_SOURCE_REDIR) && (flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) &&
+                    dataVersion - scp->dataVersion > activeRPCs) {
+            rdr_invalidate = 1;
+        }
+    }
     scp->dataVersion = dataVersion;
 
     /*
@@ -1811,10 +1841,23 @@ void cm_MergeStatus(cm_scache_t *dscp,
             lock_ReleaseWrite(&volp->rw);
         }
     }
+
   done:
     if (volp)
         cm_PutVolume(volp);
 
+    /*
+     * The scache rw lock cannot be held across the invalidation.
+     * Doing so can result in deadlocks with other threads processing
+     * requests initiated by the afs redirector.
+     */
+    if (rdr_invalidate) {
+        lock_ReleaseWrite(&scp->rw);
+        RDR_InvalidateObject(scp->fid.cell, scp->fid.volume, scp->fid.vnode,
+                             scp->fid.unique, scp->fid.hash,
+                             scp->fileType, AFS_INVALIDATE_DATA_VERSION);
+        lock_ObtainWrite(&scp->rw);
+    }
 }
 
 /* note that our stat cache info is incorrect, so force us eventually
@@ -1834,7 +1877,7 @@ void cm_DiscardSCache(cm_scache_t *scp)
     }
     scp->cbExpires = 0;
     scp->volumeCreationDate = 0;
-    _InterlockedAnd(&scp->flags, ~(CM_SCACHEFLAG_CALLBACK | CM_SCACHEFLAG_LOCAL));
+    _InterlockedAnd(&scp->flags, ~(CM_SCACHEFLAG_CALLBACK | CM_SCACHEFLAG_LOCAL | CM_SCACHEFLAG_RDR_IN_USE));
     cm_dnlcPurgedp(scp);
     cm_dnlcPurgevp(scp);
     cm_FreeAllACLEnts(scp);
@@ -2064,7 +2107,7 @@ int cm_DumpSCache(FILE *outputFile, char *cookie, int lock)
             WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
 
             for (q = scp->fileLocksH; q; q = osi_QNext(q)) {
-                cm_file_lock_t * lockp = (cm_file_lock_t *)((char *) q - offsetof(cm_file_lock_t, fileq));
+                cm_file_lock_t * lockp = fileq_to_cm_file_lock_t(q);
                 sprintf(output, "  %s lockp=0x%p scp=0x%p, cm_userp=0x%p offset=0x%I64x len=0x%08I64x type=0x%x "
                         "key=0x%I64x flags=0x%x update=0x%I64u\r\n",
                         cookie, lockp, lockp->scp, lockp->userp, lockp->range.offset, lockp->range.length,