Windows: refactor cm_CheckCBExpiration multihomed
authorJeffrey Altman <jaltman@your-file-system.com>
Sun, 16 Jan 2011 20:49:02 +0000 (15:49 -0500)
committerJeffrey Altman <jaltman@openafs.org>
Tue, 18 Jan 2011 17:54:44 +0000 (09:54 -0800)
cm_CheckCBExpiration() is refactored to make it easier
to read the decision process.  cm_CheckCBExpiration()
determines when a callback is no longer usable and as a
result the object status info should be discarded.

The windows cache manager preserves status info past
callback expiration if all of the sources of a volume
became inaccessible prior to the callback expiration
time.  The cache manager was improperly preserving the
status info for objects when the callback was issued by
a multi-homed file server when only the interface that
issued the callback is down.

A separate cm_server_t object is used to represent
each file server interface.  When one interface goes
down and others are left up, the cache manager will
now replace the down cm_server_t reference for one that
is up.  This substitution is performed as a side effect
of computing the effective downTime in cm_CBServersDownTime().

Change-Id: Ia6336a3bdd0219497fd47460accddd0cd2629f00
Reviewed-on: http://gerrit.openafs.org/3674
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Derrick Brashear <shadow@dementia.org>
Tested-by: Jeffrey Altman <jaltman@openafs.org>
Reviewed-by: Jeffrey Altman <jaltman@openafs.org>

src/WINNT/afsd/cm_callback.c

index 9199e34..d328fac 100644 (file)
@@ -1855,43 +1855,74 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp,
 }
 
 
-/* called with cm_scacheLock held */
-long cm_CBServersUp(cm_scache_t *scp, time_t * downTime)
+/*
+ * cm_CBServersDownTime() returns 1 if the downTime parameter is valid.
+ *
+ * Servers with multiple interfaces have multiple cm_server_t objects
+ * which share the same UUID.  If one interface is down but others are up,
+ * the server should not be considered down.  The returned downTime should
+ * be the largest non-zero value if down or zero if up.  If the cbServerp
+ * is down, it is updated to refer to an interface that is up (if one exists).
+ *
+ * called with cm_scacheLock held
+ */
+static long
+cm_CBServersDownTime(cm_scache_t *scp, cm_volume_t *volp, time_t * pdownTime)
 {
     cm_vol_state_t *statep;
-    cm_volume_t * volp;
-    afs_uint32 volID = scp->fid.volume;
     cm_serverRef_t *tsrp;
-    int found;
+    int alldown = 1;
+    time_t downTime = 0;
+    cm_server_t * upserver = NULL;
+    cm_server_t * downserver;
 
-    *downTime = 0;
+    *pdownTime = 0;
 
     if (scp->cbServerp == NULL)
         return 1;
 
-    volp = cm_GetVolumeByFID(&scp->fid);
-    if (!volp)
+    if (!(scp->cbServerp->flags & CM_SERVERFLAG_DOWN))
         return 1;
 
-    statep = cm_VolumeStateByID(volp, volID);
-    cm_PutVolume(volp);
-    if (statep->state == vl_online)
-        return 1;
+    statep = cm_VolumeStateByID(volp, scp->fid.volume);
+    if (statep) {
+        for (tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
+            if (tsrp->status == srv_deleted)
+                continue;
+
+            if (!cm_ServerEqual(tsrp->server, scp->cbServerp))
+                continue;
 
-    for (found = 0,tsrp = statep->serversp; tsrp; tsrp=tsrp->next) {
-        if (tsrp->status == srv_deleted)
-            continue;
-        if (cm_ServerEqual(tsrp->server, scp->cbServerp))
-            found = 1;
-        if (tsrp->server->downTime > *downTime)
-            *downTime = tsrp->server->downTime;
+            if (!(tsrp->server->flags & CM_SERVERFLAG_DOWN)) {
+                alldown = 0;
+                if (!upserver) {
+                    upserver = tsrp->server;
+                    cm_GetServer(upserver);
+                }
+            }
+
+            if (tsrp->server->downTime > downTime)
+                downTime = tsrp->server->downTime;
+        }
+    } else {
+        downTime = scp->cbServerp->downTime;
     }
 
     /* if the cbServerp does not match the current volume server list
      * we report the callback server as up so the callback can be 
      * expired.
      */
-    return(found ? 0 : 1);
+
+    if (alldown) {
+        *pdownTime = downTime;
+    } else {
+        lock_ObtainWrite(&scp->rw);
+        downserver = scp->cbServerp;
+        scp->cbServerp = upserver;
+        lock_ReleaseWrite(&scp->rw);
+        cm_PutServer(downserver);
+    }
+    return 1;
 }
 
 /* called periodically by cm_daemon to shut down use of expired callbacks */
@@ -1899,6 +1930,8 @@ void cm_CheckCBExpiration(void)
 {
     afs_uint32 i;
     cm_scache_t *scp;
+    cm_volume_t *volp = NULL;
+    enum volstatus volstate;
     time_t now, downTime;
         
     osi_Log0(afsd_logp, "CheckCBExpiration");
@@ -1907,42 +1940,78 @@ void cm_CheckCBExpiration(void)
     lock_ObtainWrite(&cm_scacheLock);
     for (i=0; i<cm_data.scacheHashTableSize; i++) {
         for (scp = cm_data.scacheHashTablep[i]; scp; scp=scp->nextp) {
+            if (volp) {
+                cm_PutVolume(volp);
+                volp = NULL;
+            }
+
+            /*
+             * If this is not a PURERO object and there is no callback
+             * or it hasn't expired, there is nothing to do
+             */
+            if (!(scp->flags & CM_SCACHEFLAG_PURERO) &&
+                (scp->cbServerp == NULL || scp->cbExpires == 0 || now < scp->cbExpires))
+                continue;
+
+            /*
+             * Determine the volume state and update the callback info
+             * to the latest if it is a PURERO object.
+             */
+            volp = cm_GetVolumeByFID(&scp->fid);
+            volstate = vl_unknown;
             downTime = 0;
-            if (scp->flags & CM_SCACHEFLAG_PURERO) {
-                cm_volume_t *volp = cm_GetVolumeByFID(&scp->fid);
-                if (volp) {
-                    if (volp->cbExpiresRO > scp->cbExpires &&
-                        scp->cbExpires > 0) 
-                    {
-                        scp->cbExpires = volp->cbExpiresRO;
-                        if (volp->cbServerpRO != scp->cbServerp) {
-                            if (scp->cbServerp)
-                               cm_PutServer(scp->cbServerp);
-                           cm_GetServer(volp->cbServerpRO);
-                           scp->cbServerp = volp->cbServerpRO;
-                        }
-                    }        
-                    cm_PutVolume(volp);
+            if (volp) {
+                if ((scp->flags & CM_SCACHEFLAG_PURERO) &&
+                    volp->cbExpiresRO > scp->cbExpires && scp->cbExpires > 0)
+                {
+                    lock_ObtainWrite(&scp->rw);
+                    scp->cbExpires = volp->cbExpiresRO;
+                    if (volp->cbServerpRO != scp->cbServerp) {
+                        if (scp->cbServerp)
+                            cm_PutServer(scp->cbServerp);
+                        cm_GetServer(volp->cbServerpRO);
+                        scp->cbServerp = volp->cbServerpRO;
+                    }
+                    lock_ReleaseWrite(&scp->rw);
                 }
+                volstate = cm_GetVolumeStatus(volp, scp->fid.volume);
             }
-            if (scp->cbServerp && scp->cbExpires > 0 && now > scp->cbExpires && 
-                 (cm_CBServersUp(scp, &downTime) || downTime == 0 || downTime >= scp->cbExpires)) 
-            {
-                cm_HoldSCacheNoLock(scp);
-                lock_ReleaseWrite(&cm_scacheLock);
-                
-                osi_Log4(afsd_logp, "Callback Expiration Discarding SCache scp 0x%p vol %u vn %u uniq %u",
-                          scp, scp->fid.volume, scp->fid.vnode, scp->fid.unique);
-                lock_ObtainWrite(&scp->rw);
-                cm_DiscardSCache(scp);
-                lock_ReleaseWrite(&scp->rw);
-                cm_CallbackNotifyChange(scp);
 
-                lock_ObtainWrite(&cm_scacheLock);
-                cm_ReleaseSCacheNoLock(scp);
-            }
+            /* If there is no callback or it hasn't expired, there is nothing to do */
+            if (scp->cbServerp == NULL || scp->cbExpires == 0 || now < scp->cbExpires)
+                continue;
+
+            /* If the volume is known not to be online, do not expire the callback */
+            if (volstate != vl_online)
+                continue;
+
+            /*
+             * If all the servers are down and the callback expired after the
+             * issuing server went down, do not expire the callback
+             */
+            if (cm_CBServersDownTime(scp, volp, &downTime) && downTime && downTime < scp->cbExpires)
+                continue;
+
+            /* The callback has expired, discard the status info */
+            cm_HoldSCacheNoLock(scp);
+            lock_ReleaseWrite(&cm_scacheLock);
+
+            osi_Log4(afsd_logp, "Callback Expiration Discarding SCache scp 0x%p vol %u vn %u uniq %u",
+                     scp, scp->fid.volume, scp->fid.vnode, scp->fid.unique);
+            lock_ObtainWrite(&scp->rw);
+            cm_DiscardSCache(scp);
+            lock_ReleaseWrite(&scp->rw);
+
+            cm_CallbackNotifyChange(scp);
+
+            lock_ObtainWrite(&cm_scacheLock);
+            cm_ReleaseSCacheNoLock(scp);
         }
     }
+    if (volp) {
+        cm_PutVolume(volp);
+        volp = NULL;
+    }
     lock_ReleaseWrite(&cm_scacheLock);
 
     osi_Log0(afsd_logp, "CheckCBExpiration Complete");