windows-freelance-deadlock-20080809
[openafs.git] / src / WINNT / afsd / cm_scache.c
index 0a2e3fc..5b7b521 100644 (file)
@@ -45,21 +45,20 @@ extern osi_mutex_t cm_Freelance_Lock;
 /* must be called with cm_scacheLock write-locked! */
 void cm_AdjustScacheLRU(cm_scache_t *scp)
 {
-    if (scp == cm_data.scacheLRULastp)
-        cm_data.scacheLRULastp = (cm_scache_t *) osi_QPrev(&scp->q);
+    lock_AssertWrite(&cm_scacheLock);
     osi_QRemoveHT((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **) &cm_data.scacheLRULastp, &scp->q);
-    osi_QAdd((osi_queue_t **) &cm_data.scacheLRUFirstp, &scp->q);
-    if (!cm_data.scacheLRULastp) 
-        cm_data.scacheLRULastp = scp;
+    osi_QAddH((osi_queue_t **) &cm_data.scacheLRUFirstp, (osi_queue_t **) &cm_data.scacheLRULastp, &scp->q);
 }
 
-/* call with scache write-locked and mutex held */
+/* call with cm_scacheLock write-locked and scp rw held */
 void cm_RemoveSCacheFromHashTable(cm_scache_t *scp)
 {
     cm_scache_t **lscpp;
     cm_scache_t *tscp;
     int i;
        
+    lock_AssertWrite(&cm_scacheLock);
+    lock_AssertWrite(&scp->rw);
     if (scp->flags & CM_SCACHEFLAG_INHASH) {
        /* hash it out first */
        i = CM_SCACHE_HASH(&scp->fid);
@@ -68,6 +67,7 @@ void cm_RemoveSCacheFromHashTable(cm_scache_t *scp)
             lscpp = &tscp->nextp, tscp = tscp->nextp) {
            if (tscp == scp) {
                *lscpp = scp->nextp;
+                scp->nextp = NULL;
                scp->flags &= ~CM_SCACHEFLAG_INHASH;
                break;
            }
@@ -93,7 +93,9 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
        return -1;
     }
 
+    lock_ObtainWrite(&scp->rw);
     cm_RemoveSCacheFromHashTable(scp);
+    lock_ReleaseWrite(&scp->rw);
 
 #if 0
     if (flags & CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS) {
@@ -164,6 +166,7 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
                     | CM_SCACHEFLAG_EACCESS);
     scp->serverModTime = 0;
     scp->dataVersion = 0;
+    scp->bufDataVersionLow = 0;
     scp->bulkStatProgress = hzero;
     scp->waitCount = 0;
     scp->waitQueueT = NULL;
@@ -178,6 +181,7 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
     scp->fid.volume = 0;
     scp->fid.unique = 0;
     scp->fid.cell = 0;
+    scp->fid.hash = 0;
 
     /* remove from dnlc */
     cm_dnlcPurgedp(scp);
@@ -187,12 +191,6 @@ long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags)
      * tried to store this to server but failed */
     scp->mask = 0;
 
-    /* drop held volume ref */
-    if (scp->volp) {
-       cm_PutVolume(scp->volp);
-       scp->volp = NULL;
-    }
-
     /* discard symlink info */
     scp->mountPointStringp[0] = '\0';
     memset(&scp->mountRootFid, 0, sizeof(cm_fid_t));
@@ -236,6 +234,7 @@ cm_scache_t *cm_GetNewSCache(void)
     cm_scache_t *scp;
     int retry = 0;
 
+    lock_AssertWrite(&cm_scacheLock);
 #if 0
     /* first pass - look for deleted objects */
     for ( scp = cm_data.scacheLRULastp;
@@ -316,7 +315,7 @@ cm_scache_t *cm_GetNewSCache(void)
                 "invalid cm_scache_t address");
     memset(scp, 0, sizeof(cm_scache_t));
     scp->magic = CM_SCACHE_MAGIC;
-    lock_InitializeMutex(&scp->mx, "cm_scache_t mutex");
+    lock_InitializeRWLock(&scp->rw, "cm_scache_t rw");
     lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
 #ifdef USE_BPLUS
     lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock");
@@ -335,10 +334,21 @@ cm_scache_t *cm_GetNewSCache(void)
     return scp;
 }       
 
+void cm_SetFid(cm_fid_t *fidp, afs_uint32 cell, afs_uint32 volume, afs_uint32 vnode, afs_uint32 unique)
+{
+    fidp->cell = cell;
+    fidp->volume = volume;
+    fidp->vnode = vnode;
+    fidp->unique = unique;
+    fidp->hash = ((cell & 0xF) << 28) | ((volume & 0x3F) << 22) | ((vnode & 0x7FF) << 11) | (unique & 0x7FF);
+}
+
 /* like strcmp, only for fids */
-int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
+__inline int cm_FidCmp(cm_fid_t *ap, cm_fid_t *bp)
 {
-    if (ap->vnode != bp->vnode) 
+    if (ap->hash != bp->hash)
+        return 1;
+    if (ap->vnode != bp->vnode)
         return 1;
     if (ap->volume != bp->volume) 
         return 1;
@@ -361,7 +371,7 @@ void cm_fakeSCacheInit(int newFile)
         cm_data.fakeSCache.linkCount = 1;
         cm_data.fakeSCache.refCount = 1;
     }
-    lock_InitializeMutex(&cm_data.fakeSCache.mx, "cm_scache_t mutex");
+    lock_InitializeRWLock(&cm_data.fakeSCache.rw, "cm_scache_t rw");
 }
 
 long
@@ -395,11 +405,6 @@ cm_ValidateSCache(void)
             fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
             return -3;
         }
-        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-            return -4;
-        }
         if (i > cm_data.currentSCaches ) {
             afsi_log("cm_ValidateSCache failure: LRU First queue loops");
             fprintf(stderr, "cm_ValidateSCache failure: LUR First queue loops\n");
@@ -429,11 +434,6 @@ cm_ValidateSCache(void)
             fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
             return -7;
         }
-        if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-            afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-            fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-            return -8;
-        }
         if (i > cm_data.currentSCaches ) {
             afsi_log("cm_ValidateSCache failure: LRU Last queue loops");
             fprintf(stderr, "cm_ValidateSCache failure: LUR Last queue loops\n");
@@ -448,6 +448,8 @@ cm_ValidateSCache(void)
 
     for ( i=0; i < cm_data.scacheHashTableSize; i++ ) {
         for ( scp = cm_data.scacheHashTablep[i]; scp; scp = scp->nextp ) {
+            afs_uint32 hash;
+            hash = CM_SCACHE_HASH(&scp->fid);
             if (scp->magic != CM_SCACHE_MAGIC) {
                 afsi_log("cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC");
                 fprintf(stderr, "cm_ValidateSCache failure: scp->magic != CM_SCACHE_MAGIC\n");
@@ -463,10 +465,10 @@ cm_ValidateSCache(void)
                 fprintf(stderr, "cm_ValidateSCache failure: scp->randomACLp->magic != CM_ACLENT_MAGIC\n");
                 return -11;
             }
-            if (scp->volp && scp->volp->magic != CM_VOLUME_MAGIC) {
-                afsi_log("cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC");
-                fprintf(stderr, "cm_ValidateSCache failure: scp->volp->magic != CM_VOLUME_MAGIC\n");
-                return -12;
+            if (hash != i) {
+                afsi_log("cm_ValidateSCache failure: scp hash != hash index");
+                fprintf(stderr, "cm_ValidateSCache failure: scp hash != hash index\n");
+                return -13;
             }
         }
     }
@@ -497,9 +499,12 @@ cm_SuspendSCache(void)
     lock_ObtainWrite(&cm_scacheLock);
     for ( scp = cm_data.allSCachesp; scp; scp = scp->allNextp ) {
         if (scp->cbServerp) {
-            if (scp->flags & CM_SCACHEFLAG_PURERO && scp->volp) {
-                if (scp->volp->cbExpiresRO == scp->cbExpires) {
-                    scp->volp->cbExpiresRO = now+1;
+            if (scp->flags & CM_SCACHEFLAG_PURERO) {
+                cm_volume_t *volp = cm_GetVolumeByFID(&scp->fid);
+                if (volp) {
+                    if (volp->cbExpiresRO == scp->cbExpires)
+                        volp->cbExpiresRO = now+1;
+                    cm_PutVolume(volp);
                 }
             }
             scp->cbExpires = now+1;
@@ -518,9 +523,9 @@ cm_ShutdownSCache(void)
     for ( scp = cm_data.allSCachesp; scp;
           scp = scp->allNextp ) {
         if (scp->randomACLp) {
-            lock_ObtainMutex(&scp->mx);
+            lock_ObtainWrite(&scp->rw);
             cm_FreeAllACLEnts(scp);
-            lock_ReleaseMutex(&scp->mx);
+            lock_ReleaseWrite(&scp->rw);
         }
 
         if (scp->cbServerp) {
@@ -537,7 +542,7 @@ cm_ShutdownSCache(void)
         scp->dirDataVersion = -1;
         lock_FinalizeRWLock(&scp->dirlock);
 #endif
-        lock_FinalizeMutex(&scp->mx);
+        lock_FinalizeRWLock(&scp->rw);
         lock_FinalizeRWLock(&scp->bufCreateLock);
     }
     lock_ReleaseWrite(&cm_scacheLock);
@@ -564,7 +569,7 @@ void cm_InitSCache(int newFile, long maxSCaches)
 
             for ( scp = cm_data.allSCachesp; scp;
                   scp = scp->allNextp ) {
-                lock_InitializeMutex(&scp->mx, "cm_scache_t mutex");
+                lock_InitializeRWLock(&scp->rw, "cm_scache_t rw");
                 lock_InitializeRWLock(&scp->bufCreateLock, "cm_scache_t bufCreateLock");
 #ifdef USE_BPLUS
                 lock_InitializeRWLock(&scp->dirlock, "cm_scache_t dirlock");
@@ -612,16 +617,17 @@ cm_scache_t *cm_FindSCache(cm_fid_t *fidp)
        return NULL;
     }
 
-    lock_ObtainWrite(&cm_scacheLock);
+    lock_ObtainRead(&cm_scacheLock);
     for (scp=cm_data.scacheHashTablep[hash]; scp; scp=scp->nextp) {
         if (cm_FidCmp(fidp, &scp->fid) == 0) {
             cm_HoldSCacheNoLock(scp);
+            lock_ConvertRToW(&cm_scacheLock);
             cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             return scp;
         }
     }
-    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ReleaseRead(&cm_scacheLock);
     return NULL;
 }
 
@@ -634,25 +640,26 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
 #endif
 {
     long hash;
-    cm_scache_t *scp;
+    cm_scache_t *scp = NULL;
     long code;
     cm_volume_t *volp = NULL;
     cm_cell_t *cellp;
-    char* mp = NULL;
-    int special; // yj: boolean variable to test if file is on root.afs
-    int isRoot;
+    int special = 0; // yj: boolean variable to test if file is on root.afs
+    int isRoot = 0;
     extern cm_fid_t cm_rootFid;
         
     hash = CM_SCACHE_HASH(fidp);
         
     osi_assertx(fidp->cell != 0, "unassigned cell value");
 
-    if (fidp->cell== cm_data.rootFid.cell && 
-         fidp->volume==cm_data.rootFid.volume &&
-         fidp->vnode==0x0 && fidp->unique==0x0)
-    {
-        osi_Log0(afsd_logp,"cm_GetSCache called with root cell/volume and vnode=0 and unique=0");
-    }
+#ifdef AFS_FREELANCE_CLIENT
+    special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+               fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
+               !(fidp->vnode==0x1 && fidp->unique==0x1));
+    isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
+              fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
+              fidp->vnode==0x1 && fidp->unique==0x1);
+#endif
 
     // yj: check if we have the scp, if so, we don't need
     // to do anything else
@@ -663,6 +670,11 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
            afsi_log("%s:%d cm_GetSCache (1) outScpp 0x%p ref %d", file, line, scp, scp->refCount);
            osi_Log1(afsd_logp,"cm_GetSCache (1) outScpp 0x%p", scp);
 #endif
+#ifdef AFS_FREELANCE_CLIENT
+            if (cm_freelanceEnabled && special && 
+                cm_data.fakeDirVersion != scp->dataVersion)
+                break;
+#endif
             cm_HoldSCacheNoLock(scp);
             *outScpp = scp;
             cm_AdjustScacheLRU(scp);
@@ -680,12 +692,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     // because we have to fill in the status stuff 'coz we
     // don't want trybulkstat to fill it in for us
 #ifdef AFS_FREELANCE_CLIENT
-    special = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
-               fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-               !(fidp->vnode==0x1 && fidp->unique==0x1));
-    isRoot = (fidp->cell==AFS_FAKE_ROOT_CELL_ID && 
-              fidp->volume==AFS_FAKE_ROOT_VOL_ID &&
-              fidp->vnode==0x1 && fidp->unique==0x1);
     if (cm_freelanceEnabled && isRoot) {
         osi_Log0(afsd_logp,"cm_GetSCache Freelance and isRoot");
         /* freelance: if we are trying to get the root scp for the first
@@ -695,15 +701,33 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     }
          
     if (cm_freelanceEnabled && special) {
+        char mp[MOUNTPOINTLEN] = "";
+        afs_uint32 fileType;
+
+        lock_ReleaseWrite(&cm_scacheLock);
         osi_Log0(afsd_logp,"cm_GetSCache Freelance and special");
-        if (fidp->vnode > 1 && fidp->vnode <= cm_noLocalMountPoints + 2) {
-           lock_ObtainMutex(&cm_Freelance_Lock);
-            mp =(cm_localMountPoints+fidp->vnode-2)->mountPointStringp;
-            lock_ReleaseMutex(&cm_Freelance_Lock);
+
+        if (cm_getLocalMountPointChange()) {   // check for changes
+            cm_clearLocalMountPointChange();    // clear the changefile
+            cm_reInitLocalMountPoints();       // start reinit
+        }
+
+        lock_ObtainMutex(&cm_Freelance_Lock);
+        if (fidp->vnode >= 2 && fidp->vnode - 2 < cm_noLocalMountPoints) {
+            strncpy(mp,(cm_localMountPoints+fidp->vnode-2)->mountPointStringp, MOUNTPOINTLEN);
+            mp[MOUNTPOINTLEN-1] = '\0';
+            if ( !strnicmp(mp, "msdfs:", strlen("msdfs:")) )
+                fileType = CM_SCACHETYPE_DFSLINK;
+            else
+                fileType = (cm_localMountPoints+fidp->vnode-2)->fileType;
         } else {
-            mp = "";
+            fileType = CM_SCACHETYPE_INVALID;
+
         }
-        scp = cm_GetNewSCache();
+        lock_ReleaseMutex(&cm_Freelance_Lock);
+        lock_ObtainWrite(&cm_scacheLock);
+        if (scp == NULL)
+            scp = cm_GetNewSCache();
        if (scp == NULL) {
            osi_Log0(afsd_logp,"cm_GetSCache unable to obtain *new* scache entry");
             lock_ReleaseWrite(&cm_scacheLock);
@@ -717,34 +741,26 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
         * assume that no one else is using the one this is returned.
         */
        lock_ReleaseWrite(&cm_scacheLock);
-       lock_ObtainMutex(&scp->mx);
+       lock_ObtainWrite(&scp->rw);
        lock_ObtainWrite(&cm_scacheLock);
 #endif
         scp->fid = *fidp;
-        scp->volp = cm_data.rootSCachep->volp;
-       cm_GetVolume(scp->volp);        /* grab an additional reference */
         scp->dotdotFid.cell=AFS_FAKE_ROOT_CELL_ID;
         scp->dotdotFid.volume=AFS_FAKE_ROOT_VOL_ID;
         scp->dotdotFid.unique=1;
         scp->dotdotFid.vnode=1;
         scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-        scp->nextp=cm_data.scacheHashTablep[hash];
-        cm_data.scacheHashTablep[hash]=scp;
-        scp->flags |= CM_SCACHEFLAG_INHASH;
+        if (!(scp->flags & CM_SCACHEFLAG_INHASH)) {
+            scp->nextp = cm_data.scacheHashTablep[hash];
+            cm_data.scacheHashTablep[hash] = scp;
+            scp->flags |= CM_SCACHEFLAG_INHASH;
+        }
         scp->refCount = 1;
        osi_Log1(afsd_logp,"cm_GetSCache (freelance) sets refCount to 1 scp 0x%x", scp);
-        if (fidp->vnode > 1 && fidp->vnode <= cm_noLocalMountPoints + 2)
-            scp->fileType = (cm_localMountPoints+fidp->vnode-2)->fileType;
-        else 
-            scp->fileType = CM_SCACHETYPE_INVALID;
-
-        lock_ObtainMutex(&cm_Freelance_Lock);
+        scp->fileType = fileType;
         scp->length.LowPart = (DWORD)strlen(mp)+4;
         scp->length.HighPart = 0;
         strncpy(scp->mountPointStringp,mp,MOUNTPOINTLEN);
-        scp->mountPointStringp[MOUNTPOINTLEN-1] = '\0';
-        lock_ReleaseMutex(&cm_Freelance_Lock);
-
         scp->owner=0x0;
         scp->unixModeBits=0777;
         scp->clientModTime=FakeFreelanceModTime;
@@ -753,9 +769,10 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
         scp->parentVnode=0x1;
         scp->group=0;
         scp->dataVersion=cm_data.fakeDirVersion;
+        scp->bufDataVersionLow=cm_data.fakeDirVersion;
         scp->lockDataVersion=-1; /* no lock yet */
 #if not_too_dangerous
-       lock_ReleaseMutex(&scp->mx);
+       lock_ReleaseWrite(&scp->rw);
 #endif
        *outScpp = scp;
         lock_ReleaseWrite(&cm_scacheLock);
@@ -771,11 +788,11 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
     /* otherwise, we need to find the volume */
     if (!cm_freelanceEnabled || !isRoot) {
         lock_ReleaseWrite(&cm_scacheLock);     /* for perf. reasons */
-        cellp = cm_FindCellByID(fidp->cell);
+        cellp = cm_FindCellByID(fidp->cell, 0);
         if (!cellp) 
             return CM_ERROR_NOSUCHCELL;
 
-        code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_CREATE, &volp);
+        code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp, CM_GETVOL_FLAG_CREATE, &volp);
         if (code) 
             return code;
         lock_ObtainWrite(&cm_scacheLock);
@@ -791,7 +808,6 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
            osi_Log1(afsd_logp,"cm_GetSCache (3) outScpp 0x%p", scp);
 #endif
             cm_HoldSCacheNoLock(scp);
-            osi_assertx(scp->volp == volp, "cm_scache_t volume has unexpected value");
             cm_AdjustScacheLRU(scp);
             lock_ReleaseWrite(&cm_scacheLock);
             if (volp)
@@ -821,33 +837,37 @@ long cm_GetSCache(cm_fid_t *fidp, cm_scache_t **outScpp, cm_user_t *userp,
      * assume that no one else is using the one this is returned.
      */
     lock_ReleaseWrite(&cm_scacheLock);
-    lock_ObtainMutex(&scp->mx);
+    lock_ObtainWrite(&scp->rw);
     lock_ObtainWrite(&cm_scacheLock);
 #endif
     scp->fid = *fidp;
-    scp->volp = volp;  /* a held reference */
-
     if (!cm_freelanceEnabled || !isRoot) {
         /* if this scache entry represents a volume root then we need 
          * to copy the dotdotFipd from the volume structure where the 
          * "master" copy is stored (defect 11489)
          */
-        if (scp->fid.vnode == 1 && scp->fid.unique == 1) {
-           scp->dotdotFid = volp->dotdotFid;
-        }
-         
-        if (volp->ro.ID == fidp->volume)
+        if (volp->vol[ROVOL].ID == fidp->volume) {
            scp->flags |= (CM_SCACHEFLAG_PURERO | CM_SCACHEFLAG_RO);
-        else if (volp->bk.ID == fidp->volume)
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, ROVOL)->dotdotFid;
+        } else if (volp->vol[BACKVOL].ID == fidp->volume) {
            scp->flags |= CM_SCACHEFLAG_RO;
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, BACKVOL)->dotdotFid;
+        } else {
+            if (scp->fid.vnode == 1 && scp->fid.unique == 1)
+                scp->dotdotFid = cm_VolumeStateByType(volp, RWVOL)->dotdotFid;
+        }
     }
+    if (volp)
+        cm_PutVolume(volp);
     scp->nextp = cm_data.scacheHashTablep[hash];
     cm_data.scacheHashTablep[hash] = scp;
     scp->flags |= CM_SCACHEFLAG_INHASH;
     scp->refCount = 1;
     osi_Log1(afsd_logp,"cm_GetSCache sets refCount to 1 scp 0x%x", scp);
 #if not_too_dangerous
-    lock_ReleaseMutex(&scp->mx);
+    lock_ReleaseWrite(&scp->rw);
 #endif
 
     /* XXX - The following fields in the cm_scache are 
@@ -876,10 +896,8 @@ cm_scache_t * cm_FindSCacheParent(cm_scache_t * scp)
     cm_fid_t    parent_fid;
     cm_scache_t * pscp = NULL;
 
-    lock_ObtainRead(&cm_scacheLock);
-    parent_fid = scp->fid;
-    parent_fid.vnode = scp->parentVnode;
-    parent_fid.unique = scp->parentUnique;
+    lock_ObtainWrite(&cm_scacheLock);
+    cm_SetFid(&parent_fid, scp->fid.cell, scp->fid.volume, scp->parentVnode, scp->parentUnique);
 
     if (cm_FidCmp(&scp->fid, &parent_fid)) {
        i = CM_SCACHE_HASH(&parent_fid);
@@ -891,7 +909,7 @@ cm_scache_t * cm_FindSCacheParent(cm_scache_t * scp)
        }
     }
 
-    lock_ReleaseRead(&cm_scacheLock);
+    lock_ReleaseWrite(&cm_scacheLock);
 
     return pscp;
 }
@@ -1026,6 +1044,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
     afs_uint32 sleep_buf_cmflags = 0;
     afs_uint32 sleep_scp_bufs = 0;
     int wakeupCycle;
+    int getAccessRights = 1;
+
+    lock_AssertWrite(&scp->rw);
 
     /* lookup this first */
     bufLocked = flags & CM_SCACHESYNC_BUFLOCKED;
@@ -1210,9 +1231,9 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                    lock_ReleaseMutex(&bufp->mx);
                 code = cm_GetCallback(scp, userp, reqp, (flags & CM_SCACHESYNC_FORCECB)?1:0);
                 if (bufLocked) {
-                    lock_ReleaseMutex(&scp->mx);
+                    lock_ReleaseWrite(&scp->rw);
                     lock_ObtainMutex(&bufp->mx);
-                    lock_ObtainMutex(&scp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
                 if (code) 
                     return code;
@@ -1225,10 +1246,10 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
             /* can't check access rights without a callback */
             osi_assertx(flags & CM_SCACHESYNC_NEEDCALLBACK, "!CM_SCACHESYNC_NEEDCALLBACK");
 
-            if ((rights & PRSFS_WRITE) && (scp->flags & CM_SCACHEFLAG_RO))
+            if ((rights & (PRSFS_WRITE|PRSFS_DELETE)) && (scp->flags & CM_SCACHEFLAG_RO))
                 return CM_ERROR_READONLY;
 
-            if (cm_HaveAccessRights(scp, userp, rights, &outRights)) {
+            if (cm_HaveAccessRights(scp, userp, rights, &outRights) || !getAccessRights) {
                 if (~outRights & rights) 
                    return CM_ERROR_NOACCESS;
             }
@@ -1237,12 +1258,13 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
                 if (bufLocked) lock_ReleaseMutex(&bufp->mx);
                 code = cm_GetAccessRights(scp, userp, reqp);
                 if (bufLocked) {
-                    lock_ReleaseMutex(&scp->mx);
+                    lock_ReleaseWrite(&scp->rw);
                     lock_ObtainMutex(&bufp->mx);
-                    lock_ObtainMutex(&scp->mx);
+                    lock_ObtainWrite(&scp->rw);
                 }
                 if (code) 
                     return code;
+                getAccessRights = 0;    /* do not repeat */
                 continue;
             }
         }
@@ -1275,22 +1297,19 @@ long cm_SyncOp(cm_scache_t *scp, cm_buf_t *bufp, cm_user_t *userp, cm_req_t *req
             scp->waitCount = scp->waitRequests = 1;
         }
 
-        if (bufLocked) 
-            lock_ReleaseMutex(&bufp->mx);
-
         cm_SyncOpAddToWaitQueue(scp, flags, bufp);
         wakeupCycle = 0;
         do {
-            if (wakeupCycle++ != 0)
-                lock_ObtainMutex(&scp->mx);
-            osi_SleepM((LONG_PTR) &scp->flags, &scp->mx);
+            if (bufLocked) 
+                lock_ReleaseMutex(&bufp->mx);
+            osi_SleepW((LONG_PTR) &scp->flags, &scp->rw);
+            if (bufLocked) 
+                lock_ObtainMutex(&bufp->mx);
+            lock_ObtainWrite(&scp->rw);
         } while (!cm_SyncOpCheckContinue(scp, flags, bufp));
 
        smb_UpdateServerPriority();
 
-        if (bufLocked) 
-            lock_ObtainMutex(&bufp->mx);
-        lock_ObtainMutex(&scp->mx);
         scp->waitCount--;
         osi_Log3(afsd_logp, "CM SyncOp woke! scp 0x%p; still waiting %d threads of %d requests", 
                  scp, scp->waitCount, scp->waitRequests);
@@ -1374,7 +1393,7 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
     osi_queueData_t *qdp;
     cm_buf_t *tbufp;
 
-    lock_AssertMutex(&scp->mx);
+    lock_AssertWrite(&scp->rw);
 
     /* now, update the recorded state for RPC-type calls */
     if (flags & CM_SCACHESYNC_FETCHSTATUS)
@@ -1502,7 +1521,7 @@ void cm_MergeStatus(cm_scache_t *dscp,
         statusp->Group = 0;
         statusp->SyncCounter = 0;
         statusp->dataVersionHigh = (afs_uint32)(cm_data.fakeDirVersion >> 32);
-       statusp->errorCode = 0;
+        statusp->errorCode = 0;
     }
 #endif /* AFS_FREELANCE_CLIENT */
 
@@ -1524,6 +1543,7 @@ void cm_MergeStatus(cm_scache_t *dscp,
        scp->unixModeBits = 0;
        scp->anyAccess = 0;
        scp->dataVersion = 0;
+        scp->bufDataVersionLow = 0;
 
        if (dscp) {
             scp->parentVnode = dscp->fid.vnode;
@@ -1544,11 +1564,11 @@ void cm_MergeStatus(cm_scache_t *dscp,
     if (!(flags & CM_MERGEFLAG_FORCE) && dataVersion < scp->dataVersion) {
         struct cm_cell *cellp;
 
-        cellp = cm_FindCellByID(scp->fid.cell);
+        cellp = cm_FindCellByID(scp->fid.cell, 0);
         if (scp->cbServerp) {
             struct cm_volume *volp = NULL;
 
-            cm_GetVolumeByID(cellp, scp->fid.volume, userp,
+            cm_FindVolumeByID(cellp, scp->fid.volume, userp,
                               (cm_req_t *) NULL, CM_GETVOL_FLAG_CREATE, &volp);
             osi_Log2(afsd_logp, "old data from server %x volume %s",
                       scp->cbServerp->addr.sin_addr.s_addr,
@@ -1556,7 +1576,7 @@ void cm_MergeStatus(cm_scache_t *dscp,
             if (volp)
                 cm_PutVolume(volp);
         }
-        osi_Log3(afsd_logp, "Bad merge, scp %x, scp dv %I64d, RPC dv %I64d",
+        osi_Log3(afsd_logp, "Bad merge, scp %x, scp dv %d, RPC dv %d",
                   scp, scp->dataVersion, dataVersion);
         /* we have a number of data fetch/store operations running
          * concurrently, and we can tell which one executed last at the
@@ -1634,11 +1654,9 @@ void cm_MergeStatus(cm_scache_t *dscp,
         cm_AddACLCache(scp, userp, statusp->CallerAccess);
     }
 
-    if ((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion == 1) {
-        buf_ForceDataVersion(scp, scp->dataVersion, dataVersion);
-    } else if (scp->dataVersion != 0 && 
-        (!(flags & CM_MERGEFLAG_DIROP) && dataVersion != scp->dataVersion ||
-         (flags & CM_MERGEFLAG_DIROP) && dataVersion - scp->dataVersion > 1)) {
+    if (scp->dataVersion != 0 &&
+        (!(flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion != scp->dataVersion ||
+         (flags & (CM_MERGEFLAG_DIROP|CM_MERGEFLAG_STOREDATA)) && dataVersion - scp->dataVersion > 1)) {
         /* 
          * We now know that all of the data buffers that we have associated
          * with this scp are invalid.  Subsequent operations will go faster
@@ -1646,6 +1664,8 @@ void cm_MergeStatus(cm_scache_t *dscp,
          *
          * We do not remove directory buffers if the dataVersion delta is 1 because
          * those version numbers will be updated as part of the directory operation.
+         *
+         * We do not remove storedata buffers because they will still be valid.
          */
         int i, j;
         cm_buf_t **lbpp;
@@ -1657,34 +1677,64 @@ void cm_MergeStatus(cm_scache_t *dscp,
                for (bp = cm_data.buf_fileHashTablepp[i]; bp; bp=nextBp)
        {
             nextBp = bp->fileHashp;
+            /* 
+             * if the buffer belongs to this stat cache entry
+             * and the buffer mutex can be obtained, check the
+             * reference count and if it is zero, remove the buffer
+             * from the hash tables.  If there are references,
+             * the buffer might be updated to the current version
+             * so leave it in place.
+             */
+            if (cm_FidCmp(&scp->fid, &bp->fid) == 0 &&
+                 lock_TryMutex(&bp->mx)) {
+                if (bp->refCount == 0 && 
+                    !(bp->flags & CM_BUF_READING | CM_BUF_WRITING | CM_BUF_DIRTY)) {
+                    prevBp = bp->fileHashBackp;
+                    bp->fileHashBackp = bp->fileHashp = NULL;
+                    if (prevBp)
+                        prevBp->fileHashp = nextBp;
+                    else
+                        cm_data.buf_fileHashTablepp[i] = nextBp;
+                    if (nextBp)
+                        nextBp->fileHashBackp = prevBp;
+
+                    j = BUF_HASH(&bp->fid, &bp->offset);
+                    lbpp = &(cm_data.buf_scacheHashTablepp[j]);
+                    for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
+                        if (tbp == bp) 
+                            break;
+                    }
 
-            if (cm_FidCmp(&scp->fid, &bp->fid) == 0) {
-                prevBp = bp->fileHashBackp;
-                bp->fileHashBackp = bp->fileHashp = NULL;
-                if (prevBp)
-                    prevBp->fileHashp = nextBp;
-                else
-                    cm_data.buf_fileHashTablepp[i] = nextBp;
-                if (nextBp)
-                    nextBp->fileHashBackp = prevBp;
-
-                j = BUF_HASH(&bp->fid, &bp->offset);
-                lbpp = &(cm_data.buf_scacheHashTablepp[j]);
-                for(tbp = *lbpp; tbp; lbpp = &tbp->hashp, tbp = *lbpp) {
-                    if (tbp == bp) 
-                        break;
-                }
-
-                *lbpp = bp->hashp;     /* hash out */
-                bp->hashp = NULL;
+                    *lbpp = bp->hashp; /* hash out */
+                    bp->hashp = NULL;
 
-                bp->flags &= ~CM_BUF_INHASH;
+                    bp->flags &= ~CM_BUF_INHASH;
+                }
+                lock_ReleaseMutex(&bp->mx);
             }
        }
-
         lock_ReleaseWrite(&buf_globalLock);
     }
+
+    /* We maintain a range of buffer dataVersion values which are considered 
+     * valid.  This avoids the need to update the dataVersion on each buffer
+     * object during an uncontested storeData operation.  As a result this 
+     * merge status no longer has performance characteristics derived from
+     * the size of the file.
+     */
+    if (((flags & CM_MERGEFLAG_STOREDATA) && dataVersion - scp->dataVersion > 1) || 
+         (!(flags & CM_MERGEFLAG_STOREDATA) && scp->dataVersion != dataVersion) ||
+         scp->bufDataVersionLow == 0)
+        scp->bufDataVersionLow = dataVersion;
+    
     scp->dataVersion = dataVersion;
+
+    /* 
+     * If someone is waiting for status information, we can wake them up
+     * now even though the entity that issued the FetchStatus may not 
+     * have completed yet.
+     */
+    cm_SyncOpDone(scp, NULL, CM_SCACHESYNC_FETCHSTATUS);
 }
 
 /* note that our stat cache info is incorrect, so force us eventually
@@ -1697,7 +1747,7 @@ void cm_MergeStatus(cm_scache_t *dscp,
  */
 void cm_DiscardSCache(cm_scache_t *scp)
 {
-    lock_AssertMutex(&scp->mx);
+    lock_AssertWrite(&scp->rw);
     if (scp->cbServerp) {
         cm_PutServer(scp->cbServerp);
        scp->cbServerp = NULL;
@@ -1708,6 +1758,9 @@ void cm_DiscardSCache(cm_scache_t *scp)
     cm_dnlcPurgevp(scp);
     cm_FreeAllACLEnts(scp);
 
+    if (scp->fileType == CM_SCACHETYPE_DFSLINK)
+        cm_VolStatus_Invalidate_DFS_Mapping(scp);
+
     /* Force mount points and symlinks to be re-evaluated */
     scp->mountPointStringp[0] = '\0';
 }
@@ -1724,12 +1777,15 @@ void cm_HoldSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
 #else
 void cm_HoldSCacheNoLock(cm_scache_t *scp)
 #endif
-{
+{     
+    afs_int32 refCount;
+
     osi_assertx(scp != NULL, "null cm_scache_t");
-    scp->refCount++;
+    lock_AssertAny(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_HoldSCacheNoLock scp 0x%p ref %d",scp, scp->refCount);
-    afsi_log("%s:%d cm_HoldSCacheNoLock scp 0x%p, ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_HoldSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCacheNoLock scp 0x%p, ref %d", file, line, scp, refCount);
 #endif
 }
 
@@ -1739,14 +1795,16 @@ void cm_HoldSCacheDbg(cm_scache_t *scp, char * file, long line)
 void cm_HoldSCache(cm_scache_t *scp)
 #endif
 {
+    afs_int32 refCount;
+
     osi_assertx(scp != NULL, "null cm_scache_t");
-    lock_ObtainWrite(&cm_scacheLock);
-    scp->refCount++;
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedIncrement(&scp->refCount);
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_HoldSCache scp 0x%p ref %d",scp, scp->refCount);
-    afsi_log("%s:%d cm_HoldSCache scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_HoldSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_HoldSCache scp 0x%p ref %d", file, line, scp, refCount);
 #endif
-    lock_ReleaseWrite(&cm_scacheLock);
+    lock_ReleaseRead(&cm_scacheLock);
 }
 
 #ifdef DEBUG_REFCOUNT
@@ -1755,14 +1813,37 @@ void cm_ReleaseSCacheNoLockDbg(cm_scache_t *scp, char * file, long line)
 void cm_ReleaseSCacheNoLock(cm_scache_t *scp)
 #endif
 {
+    afs_int32 refCount;
+    long      lockstate;
+
     osi_assertx(scp != NULL, "null cm_scache_t");
-    if (scp->refCount == 0)
+    lock_AssertAny(&cm_scacheLock);
+
+    lockstate = lock_GetRWLockState(&cm_scacheLock);
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
        osi_Log1(afsd_logp,"cm_ReleaseSCacheNoLock about to panic scp 0x%x",scp);
-    osi_assertx(scp->refCount-- >= 0, "cm_scache_t refCount 0");
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_ReleaseSCacheNoLock scp 0x%p ref %d",scp,scp->refCount);
-    afsi_log("%s:%d cm_ReleaseSCacheNoLock scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_ReleaseSCacheNoLock scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCacheNoLock scp 0x%p ref %d", file, line, scp, refCount);
 #endif
+
+    if (refCount == 0 && (scp->flags & CM_SCACHEFLAG_DELETED)) {
+        int deleted = 0;
+        if (lockstate != OSI_RWLOCK_WRITEHELD) 
+            lock_ConvertRToW(&cm_scacheLock);
+        lock_ObtainWrite(&scp->rw);
+        if (scp->flags & CM_SCACHEFLAG_DELETED)
+            deleted = 1;
+        lock_ReleaseWrite(&scp->rw);
+        if (refCount == 0 && deleted)
+            cm_RecycleSCache(scp, 0);
+        if (lockstate != OSI_RWLOCK_WRITEHELD) 
+            lock_ConvertWToR(&cm_scacheLock);
+    }
 }
 
 #ifdef DEBUG_REFCOUNT
@@ -1770,18 +1851,36 @@ void cm_ReleaseSCacheDbg(cm_scache_t *scp, char * file, long line)
 #else
 void cm_ReleaseSCache(cm_scache_t *scp)
 #endif
-{
+{     
+    afs_int32 refCount;
+
     osi_assertx(scp != NULL, "null cm_scache_t");
-    lock_ObtainWrite(&cm_scacheLock);
-    if (scp->refCount == 0)
+    lock_ObtainRead(&cm_scacheLock);
+    refCount = InterlockedDecrement(&scp->refCount);
+#ifdef DEBUG_REFCOUNT
+    if (refCount < 0)
        osi_Log1(afsd_logp,"cm_ReleaseSCache about to panic scp 0x%x",scp);
-    osi_assertx(scp->refCount != 0, "cm_scache_t refCount 0");
-    scp->refCount--;
+#endif
+    osi_assertx(refCount >= 0, "cm_scache_t refCount 0");
 #ifdef DEBUG_REFCOUNT
-    osi_Log2(afsd_logp,"cm_ReleaseSCache scp 0x%p ref %d",scp,scp->refCount);
-    afsi_log("%s:%d cm_ReleaseSCache scp 0x%p ref %d", file, line, scp, scp->refCount);
+    osi_Log2(afsd_logp,"cm_ReleaseSCache scp 0x%p ref %d",scp, refCount);
+    afsi_log("%s:%d cm_ReleaseSCache scp 0x%p ref %d", file, line, scp, refCount);
 #endif
-    lock_ReleaseWrite(&cm_scacheLock);
+
+    if (scp->flags & CM_SCACHEFLAG_DELETED) {
+        int deleted = 0;
+        lock_ObtainWrite(&scp->rw);
+        if (scp->flags & CM_SCACHEFLAG_DELETED)
+            deleted = 1;
+        lock_ReleaseWrite(&scp->rw);
+        if (deleted) {
+            lock_ConvertRToW(&cm_scacheLock);
+            cm_RecycleSCache(scp, 0);
+            lock_ConvertWToR(&cm_scacheLock);
+        }
+    }
+
+    lock_ReleaseRead(&cm_scacheLock);
 }
 
 /* just look for the scp entry to get filetype */
@@ -1825,9 +1924,9 @@ int cm_DumpSCache(FILE *outputFile, char *cookie, int lock)
   
     for (scp = cm_data.allSCachesp; scp; scp = scp->allNextp) 
     {
-        sprintf(output, "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) volp=0x%p type=%d dv=%I64d len=0x%I64x mp='%s' flags=0x%x cb=0x%x refCount=%u\r\n", 
+        sprintf(output, "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) type=%d dv=%I64d len=0x%I64x mp='%s' flags=0x%x cb=0x%x refCount=%u\r\n", 
                 cookie, scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique, 
-                scp->volp, scp->fileType, scp->dataVersion, scp->length.QuadPart, scp->mountPointStringp, scp->flags,
+                scp->fileType, scp->dataVersion, scp->length.QuadPart, scp->mountPointStringp, scp->flags,
                 (unsigned long)scp->cbExpires, scp->refCount);
         WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
     }