From c904d9a3889b7dd8c8795feaa9e5e4979f681f53 Mon Sep 17 00:00:00 2001
From: Jeffrey Altman <jaltman@your-file-system.com>
Date: Tue, 15 Nov 2011 18:40:21 -0500
Subject: [PATCH] Windows: Fairness for background operations

The background daemon worker pool is responsible for processing
background Store and Fetch operations.  With the SMB interface
primary store and fetch operations are performed in the SMB worker
thread which makes sense since those operations must be synchronous
to the incoming request.

With the AFS redirector interface almost all of the work is performed
by the background daemon worker pool.  It is therefore critical that
the workers not get stuck in a state that starves applications.
For example, copy of a file that is larger than the cache to \\AFS
will result in a background store request for each chunk size of
the file.  If each worker thread grabs one to process, only one will
make progress and the rest will block.  If a cleanup operation
(aka handle close) occurs the entire file will be flushed to the
server synchronously in the redirector worker thread.  That thread
will cause of the background daemon threads to block.

Any subsequent fetch data requests that get queued behind the list
of stores will in turn block until they clear.  This behavior is not
fair.

This patchset adds a new test to the cm_BkgDaemon() request
selection loop, cm_RequestWillBlock().  If a request will block it
is skipped.  If there are no requests to process that would not have
blocked, the worker will sleep for 25ms instead of the usual 1s.

For BkgStore operations, the CM_SCACHEFLAG_DATASTORING flag is
used to indicating a blocking state.

For BkgFetch and PreFetch operations, the CM_BUF_WRITING and
CM_BUF_READING flags on the first cm_buf_t of the range is used
to indicate a blocking state.

Change-Id: I95d9d1f14dbe0c7d717e6a7253ccfb10a9fac851
Reviewed-on: http://gerrit.openafs.org/6056
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Tested-by: Jeffrey Altman <jaltman@secure-endpoints.com>
Reviewed-by: Jeffrey Altman <jaltman@secure-endpoints.com>
---
 src/WINNT/afsd/cm_daemon.c | 74 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/WINNT/afsd/cm_daemon.c b/src/WINNT/afsd/cm_daemon.c
index cdcfbf2..bb99dc0 100644
--- a/src/WINNT/afsd/cm_daemon.c
+++ b/src/WINNT/afsd/cm_daemon.c
@@ -90,6 +90,47 @@ void cm_IpAddrDaemon(long parm)
     thrd_SetEvent(cm_IPAddrDaemon_ShutdownEvent);
 }
 
+afs_int32 cm_RequestWillBlock(cm_bkgRequest_t *rp)
+{
+    afs_int32 willBlock = 0;
+
+    if (rp->procp == cm_BkgStore) {
+        /*
+         * If the datastoring flag is set, it means that another
+         * thread is already performing an exclusive store operation
+         * on this file.  The exclusive state will be cleared once
+         * the file server locks the vnode.  Therefore, at most two
+         * threads can be actively involved in storing data at a time
+         * on a file.
+         */
+        lock_ObtainRead(&rp->scp);
+        willBlock = (rp->scp->flags & CM_SCACHEFLAG_DATASTORING);
+        lock_ReleaseRead(&rp->scp);
+    }
+    else if (rp->procp == RDR_BkgFetch || rp->procp == cm_BkgPrefetch) {
+        /*
+         * Attempt to determine if there is a conflict on the requested
+         * range of the file.  If the first in the range does not exist
+         * in the cache assume there is no conflict.  If the buffer does
+         * exist, check to see if an I/O operation is in progress
+         * by using the writing and reading flags as an indicator.
+         */
+        osi_hyper_t base;
+        cm_buf_t *bufp = NULL;
+
+        base.LowPart = rp->p1;
+        base.HighPart = rp->p2;
+
+        bufp = buf_Find(&rp->scp->fid, &base);
+        if (bufp) {
+            willBlock = (bufp->flags & (CM_BUF_WRITING|CM_BUF_READING));
+            buf_Release(bufp);
+        }
+    }
+
+    return willBlock;
+}
+
 void cm_BkgDaemon(void * parm)
 {
     cm_bkgRequest_t *rp;
@@ -107,6 +148,8 @@ void cm_BkgDaemon(void * parm)
 
     lock_ObtainWrite(&cm_daemonLock);
     while (daemon_ShutdownFlag == 0) {
+        int willBlock = 0;
+
         if (powerStateSuspended) {
             Sleep(1000);
             continue;
@@ -120,14 +163,35 @@ void cm_BkgDaemon(void * parm)
         /* we found a request */
         for (rp = cm_bkgListEndp; rp; rp = (cm_bkgRequest_t *) osi_QPrev(&rp->q))
 	{
-	    if (cm_ServerAvailable(&rp->scp->fid, rp->userp) ||
-                rp->scp->flags & CM_SCACHEFLAG_DELETED)
+            if (rp->scp->flags & CM_SCACHEFLAG_DELETED)
 		break;
+
+            /*
+             * If the request has active I/O such that this worker would
+             * be forced to block, leave the request in the queue and move
+             * on to one that might be available for servicing.
+             */
+            if (cm_RequestWillBlock(rp)) {
+                willBlock++;
+                continue;
+            }
+
+            if (cm_ServerAvailable(&rp->scp->fid, rp->userp))
+                break;
 	}
-	if (rp == NULL) {
-	    /* we couldn't find a request that we could process at the current time */
+
+        if (rp == NULL) {
+	    /*
+             * Couldn't find a request that we could process at the
+             * current time.  If there were requests that would cause
+             * the worker to block, sleep for 25ms so it can promptly
+             * respond when it is available.  Otherwise, sleep for 1s.
+             *
+             * This polling cycle needs to be replaced with a proper
+             * producer/consumer dynamic worker pool.
+             */
 	    lock_ReleaseWrite(&cm_daemonLock);
-	    Sleep(1000);
+	    Sleep(willBlock ? 25 : 1000);
 	    lock_ObtainWrite(&cm_daemonLock);
 	    continue;
 	}
-- 
1.9.4