From: Andrew Deason <adeason@sinenomine.net>
Date: Tue, 15 Feb 2011 18:04:32 +0000 (-0600)
Subject: libafs: Drop xvcache for AllocCBR
X-Git-Tag: openafs-devel-1_7_1~526
X-Git-Url: http://git.openafs.org/?p=openafs.git;a=commitdiff_plain;h=76158df491f47de56d1febe1d1d2d17d316c9a74

libafs: Drop xvcache for AllocCBR

Normally when we AllocCBR, we are holding xvcache write-locked, since
it is called from FlushVCache. Before
a309e274632993c5aeec04c6e090f5ac95837a40, when AllocCBR needs to flush
CBRs due to a lack of space, we hit the net, giving up callbacks on
fileservers.

This can cause a problem if one of those fileservers needs to contact
us in order to complete that request, since the callback service
thread may be waiting for xvcache, causing a deadlock (that is
eventually broken by network timeouts).

To avoid this, drop xvcache if AllocCBR looks like it does not have
sufficient space. Fix all callers of afs_FlushVCache to handle the
case where we sleep, since with this change, afs_FlushVCache can sleep
on all platforms.

This partially reverts a309e274632993c5aeec04c6e090f5ac95837a40, as it
contains an alternative method of avoiding the xvcache lock in this
situation. This commit restores much of the code path to be much more
similar to how it used to be, except that it allows for dropping
xvcache for AllocCBR. This should make any change to our prior
behavior smaller/simpler, and thus safer and more consistent with
existing clients. This reintroduces the hard limit to how much space
we allocate for CBRs, although the part of
a309e274632993c5aeec04c6e090f5ac95837a40 that raised this limit is
retained.

Change-Id: Id4aaa941b3908f59390873e83e23429041c0828f
Reviewed-on: http://gerrit.openafs.org/3958
Reviewed-by: Derrick Brashear <shadow@dementia.org>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
---

diff --git a/src/afs/IRIX/osi_vfsops.c b/src/afs/IRIX/osi_vfsops.c
index d8bfda3..7aa2a31 100644
--- a/src/afs/IRIX/osi_vfsops.c
+++ b/src/afs/IRIX/osi_vfsops.c
@@ -216,11 +216,12 @@ afs_unmount(OSI_VFS_ARG(afsp), flags, cr)
      * EBUSY if any still in use
      */
     ObtainWriteLock(&afs_xvcache, 172);
+ retry:
     for (tq = VLRU.prev; tq != &VLRU; tq = uq) {
 	tvc = QTOV(tq);
 	uq = QPrev(tq);
 	vp = (vnode_t *) tvc;
-	if (error = afs_FlushVCache(tvc, &fv_slept))
+	if (error = afs_FlushVCache(tvc, &fv_slept)) {
 	    if (vp->v_flag & VROOT) {
 		rootvp = vp;
 		continue;
@@ -228,6 +229,10 @@ afs_unmount(OSI_VFS_ARG(afsp), flags, cr)
 		ReleaseWriteLock(&afs_xvcache);
 		return error;
 	    }
+	}
+	if (fv_slept) {
+	    goto retry;
+	}
     }
 
     /*
diff --git a/src/afs/LINUX/osi_vfsops.c b/src/afs/LINUX/osi_vfsops.c
index 8abe6b1..04ddafc 100644
--- a/src/afs/LINUX/osi_vfsops.c
+++ b/src/afs/LINUX/osi_vfsops.c
@@ -477,13 +477,17 @@ osi_linux_free_inode_pages(void)
     struct vcache *tvc, *nvc;
     extern struct vcache *afs_vhashT[VCSIZE];
 
+ retry:
     for (i = 0; i < VCSIZE; i++) {
 	for (tvc = afs_vhashT[i]; tvc; ) {
 	    int slept;
 	
 	    nvc = tvc->hnext;
-	    if (afs_FlushVCache(tvc, &slept))		/* slept always 0 for linux? */
+	    if (afs_FlushVCache(tvc, &slept))
 		printf("Failed to invalidate all pages on inode 0x%p\n", tvc);
+	    if (slept) {
+		goto retry;
+	    }
 	    tvc = nvc;
 	}
     }
diff --git a/src/afs/LINUX24/osi_vfsops.c b/src/afs/LINUX24/osi_vfsops.c
index 81aa3a0..702177c 100644
--- a/src/afs/LINUX24/osi_vfsops.c
+++ b/src/afs/LINUX24/osi_vfsops.c
@@ -465,13 +465,17 @@ osi_linux_free_inode_pages(void)
     struct vcache *tvc, *nvc;
     extern struct vcache *afs_vhashT[VCSIZE];
 
+ retry:
     for (i = 0; i < VCSIZE; i++) {
 	for (tvc = afs_vhashT[i]; tvc; ) {
 	    int slept;
 	
 	    nvc = tvc->hnext;
-	    if (afs_FlushVCache(tvc, &slept))		/* slept always 0 for linux? */
+	    if (afs_FlushVCache(tvc, &slept))
 		printf("Failed to invalidate all pages on inode 0x%p\n", tvc);
+	    if (slept) {
+		goto retry;
+	    }
 	    tvc = nvc;
 	}
     }
diff --git a/src/afs/afs.h b/src/afs/afs.h
index 73d5947..ac06998 100644
--- a/src/afs/afs.h
+++ b/src/afs/afs.h
@@ -260,7 +260,6 @@ struct afs_cbr {
     struct afs_cbr *hash_next;
 
     struct AFSFid fid;
-    unsigned int dynalloc:1;
 };
 
 #ifdef AFS_LINUX22_ENV
diff --git a/src/afs/afs_call.c b/src/afs/afs_call.c
index 1db4b89..0ed56ef 100644
--- a/src/afs/afs_call.c
+++ b/src/afs/afs_call.c
@@ -1366,7 +1366,6 @@ afs_shutdown(void)
 
     if (afs_shuttingdown)
 	return;
-    afs_FlushVCBs(2);       /* Reasonable effort to free dynamically allocated callback returns */
 
     afs_shuttingdown = 1;
     if (afs_cold_shutdown)
diff --git a/src/afs/afs_vcache.c b/src/afs/afs_vcache.c
index 55128d8..45118d7 100644
--- a/src/afs/afs_vcache.c
+++ b/src/afs/afs_vcache.c
@@ -83,7 +83,7 @@ static int afs_nextVcacheSlot = 0;
 static struct afs_slotlist *afs_freeSlotList = NULL;
 
 /* Forward declarations */
-static afs_int32 afs_QueueVCB(struct vcache *avc);
+static afs_int32 afs_QueueVCB(struct vcache *avc, int *slept);
 
 /*!
  * Generate an index into the hash table for a given Fid.
@@ -206,7 +206,7 @@ afs_FlushVCache(struct vcache *avc, int *slept)
 #endif
     afs_FreeAllAxs(&(avc->Access));
     if (!afs_shuttingdown)
-	afs_QueueVCB(avc);
+	afs_QueueVCB(avc, slept);
     ObtainWriteLock(&afs_xcbhash, 460);
     afs_DequeueCallback(avc);	/* remove it from queued callbacks list */
     avc->f.states &= ~(CStatd | CUnique);
@@ -301,14 +301,10 @@ afs_AllocCBR(void)
     struct afs_cbr *tsp;
     int i;
 
-    if (!afs_cbrSpace) {
-	afs_osi_CancelWait(&AFS_WaitHandler);	/* trigger FlushVCBs asap */
-
+    while (!afs_cbrSpace) {
 	if (afs_stats_cmperf.CallBackAlloced >= sizeof(afs_cbrHeads)/sizeof(afs_cbrHeads[0])) {
 	    /* don't allocate more than 16 * AFS_NCBRS for now */
-	    tsp = (struct afs_cbr *)osi_AllocSmallSpace(sizeof(*tsp));
-	    tsp->dynalloc = 1;
-	    tsp->next = NULL;
+	    afs_FlushVCBs(0);
 	    afs_stats_cmperf.CallBackFlushes++;
 	} else {
 	    /* try allocating */
@@ -316,18 +312,15 @@ afs_AllocCBR(void)
 	    osi_Assert(tsp != NULL);
 	    for (i = 0; i < AFS_NCBRS - 1; i++) {
 		tsp[i].next = &tsp[i + 1];
-		tsp[i].dynalloc = 0;
 	    }
 	    tsp[AFS_NCBRS - 1].next = 0;
-	    tsp[AFS_NCBRS - 1].dynalloc = 0;
-	    afs_cbrSpace = tsp->next;
+	    afs_cbrSpace = tsp;
 	    afs_cbrHeads[afs_stats_cmperf.CallBackAlloced] = tsp;
 	    afs_stats_cmperf.CallBackAlloced++;
 	}
-    } else {
-	tsp = afs_cbrSpace;
-	afs_cbrSpace = tsp->next;
     }
+    tsp = afs_cbrSpace;
+    afs_cbrSpace = tsp->next;
     return tsp;
 }
 
@@ -351,12 +344,8 @@ afs_FreeCBR(struct afs_cbr *asp)
     if (asp->hash_next)
 	asp->hash_next->hash_pprev = asp->hash_pprev;
 
-    if (asp->dynalloc) {
-	osi_FreeSmallSpace(asp);
-    } else {
-	asp->next = afs_cbrSpace;
-	afs_cbrSpace = asp;
-    }
+    asp->next = afs_cbrSpace;
+    afs_cbrSpace = asp;
     return 0;
 }
 
@@ -533,17 +522,20 @@ afs_FlushVCBs(afs_int32 lockit)
  * Environment:
  *	Locks the xvcb lock.
  *	Called when the xvcache lock is already held.
+ * RACE: afs_xvcache may be dropped and reacquired
  *
  * \param avc vcache entry
+ * \param slep Set to 1 if we dropped afs_xvcache
  * \return 1 if queued, 0 otherwise
  */
 
 static afs_int32
-afs_QueueVCB(struct vcache *avc)
+afs_QueueVCB(struct vcache *avc, int *slept)
 {
     int queued = 0;
     struct server *tsp;
     struct afs_cbr *tcbp;
+    int reacquire = 0;
 
     AFS_STATCNT(afs_QueueVCB);
 
@@ -560,6 +552,15 @@ afs_QueueVCB(struct vcache *avc)
     /* The callback is really just a struct server ptr. */
     tsp = (struct server *)(avc->callback);
 
+    if (!afs_cbrSpace) {
+	/* If we don't have CBR space, AllocCBR may block or hit the net for
+	 * clearing up CBRs. Hitting the net may involve a fileserver
+	 * needing to contact us, so we must drop xvcache so we don't block
+	 * those requests from going through. */
+	reacquire = *slept = 1;
+	ReleaseWriteLock(&afs_xvcache);
+    }
+
     /* we now have a pointer to the server, so we just allocate
      * a queue entry and queue it.
      */
@@ -579,6 +580,11 @@ afs_QueueVCB(struct vcache *avc)
  done:
     /* now release locks and return */
     ReleaseWriteLock(&afs_xvcb);
+
+    if (reacquire) {
+	/* make sure this is after dropping xvcb, for locking order */
+	ObtainWriteLock(&afs_xvcache, 279);
+    }
     return queued;
 }
 
@@ -3114,13 +3120,18 @@ afs_DisconGiveUpCallbacks(void)
 
     ObtainWriteLock(&afs_xvcache, 1002); /* XXX - should be a unique number */
 
+ retry:
     /* Somehow, walk the set of vcaches, with each one coming out as tvc */
     for (i = 0; i < VCSIZE; i++) {
         for (tvc = afs_vhashT[i]; tvc; tvc = tvc->hnext) {
-            if (afs_QueueVCB(tvc)) {
+	    int slept = 0;
+            if (afs_QueueVCB(tvc, &slept)) {
                 tvc->callback = NULL;
                 nq++;
             }
+	    if (slept) {
+		goto retry;
+	    }
         }
     }