Windows: cm_Analyze if no retry don't sleep

[openafs.git] / src / WINNT / afsd / cm_conn.c
diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c

index 2577827..b007f40 100644 (file)
--- a/src/WINNT/afsd/cm_conn.c
+++ b/src/WINNT/afsd/cm_conn.c
@@ -394,7 +394,11 @@ cm_Analyze(cm_conn_t *connp,
         }
     }
 
-    if (errorCode == CM_ERROR_TIMEDOUT) {
+    if (errorCode == 0) {
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+    }
+    else if (errorCode == CM_ERROR_TIMEDOUT) {
        osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_TIMEDOUT");
         if ( timeLeft > 5 ) {
             thrd_Sleep(3000);
@@ -415,6 +419,9 @@ cm_Analyze(cm_conn_t *connp,
             thrd_Sleep(1000);
             retry = 1;
         }
+
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
     }
 
     /* if there is nosuchvolume, then we have a situation in which a
@@ -512,7 +519,9 @@ cm_Analyze(cm_conn_t *connp,
                  * Do not perform a cm_CheckOfflineVolume() if cm_Analyze()
                  * was called by cm_CheckOfflineVolumeState().
                  */
-                if (!(reqp->flags & CM_REQ_OFFLINE_VOL_CHK) && timeLeft > 7) {
+               if (!(reqp->flags & (CM_REQ_OFFLINE_VOL_CHK|CM_REQ_NORETRY)) &&
+                   timeLeft > 7)
+               {
                     thrd_Sleep(5000);
 
                     /* cm_CheckOfflineVolume() resets the serverRef state */
@@ -557,9 +566,14 @@ cm_Analyze(cm_conn_t *connp,
                     volServerspp = NULL;
                 }
 
-                if (timeLeft > 7) {
-                    thrd_Sleep(5000);
-                    statep = cm_VolumeStateByID(volp, fidp->volume);
+               /*
+                * retry all replicas for 5 minutes waiting 15 seconds
+                * between attempts.
+                */
+               if (timeLeft > 20 && !(reqp->flags & CM_REQ_NORETRY) &&
+                   reqp->volbusyCount++ < 20)
+               {
+                   thrd_Sleep(15000);
                     retry = 1;
                 }
                 cm_UpdateVolumeStatus(volp, fidp->volume);
@@ -572,19 +586,21 @@ cm_Analyze(cm_conn_t *connp,
         } else {    /* VL Server query */
             osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY (VL Server).");
 
-            if (timeLeft > 7) {
+           if (timeLeft > 7 && !(reqp->flags & CM_REQ_NORETRY) && vlServerspp)
+           {
                 thrd_Sleep(5000);
 
-                if (vlServerspp) {
-                    cm_ResetServerBusyStatus(vlServerspp);
-                    retry = 1;
-                }
+               cm_ResetServerBusyStatus(vlServerspp);
+               retry = 1;
             }
         }
     }
 
     /* special codes:  VBUSY and VRESTARTING */
     else if (errorCode == VBUSY || errorCode == VRESTARTING) {
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (fidp) {
             code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
                                       CM_GETVOL_FLAG_NO_LRU_UPDATE,
@@ -650,6 +666,9 @@ cm_Analyze(cm_conn_t *connp,
     else if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE ||
              errorCode == VSALVAGE || errorCode == VIO)
     {
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         /* In case of timeout */
         reqp->volumeError = errorCode;
 
@@ -823,6 +842,9 @@ cm_Analyze(cm_conn_t *connp,
         if ( timeLeft > 2 )
             retry = 1;
     } else if ( errorCode == VNOVNODE ) {
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
        if ( fidp ) {
            osi_Log4(afsd_logp, "cm_Analyze passed VNOVNODE cell %u vol %u vn %u uniq %u.",
                      fidp->cell, fidp->volume, fidp->vnode, fidp->unique);
@@ -947,6 +969,9 @@ cm_Analyze(cm_conn_t *connp,
          * is currently busy on the server.  Unconditionally
          * retry the request so an alternate call channel can be used.
          */
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (serverp)
             sprintf(addr, "%d.%d.%d.%d",
                     ((serverp->addr.sin_addr.s_addr & 0xff)),
@@ -970,6 +995,9 @@ cm_Analyze(cm_conn_t *connp,
          * The RPC was not serviced so it can be retried and any
          * existing status information is still valid.
          */
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (fidp) {
             if (serverp)
                 sprintf(addr, "%d.%d.%d.%d",
@@ -1010,6 +1038,9 @@ cm_Analyze(cm_conn_t *connp,
          * client should fail over to another server.  If this is a
          * request against a single source, the client may retry once.
          */
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (serverp)
             sprintf(addr, "%d.%d.%d.%d",
                     ((serverp->addr.sin_addr.s_addr & 0xff)),
@@ -1078,7 +1109,8 @@ cm_Analyze(cm_conn_t *connp,
                  (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
 
         if (serverp) {
-            if ((reqp->flags & CM_REQ_NEW_CONN_FORCED)) {
+           if ((connp->flags & CM_CONN_FLAG_NEW) ||
+               (reqp->flags & CM_REQ_NEW_CONN_FORCED)) {
                 lock_ObtainMutex(&serverp->mx);
                 if (!(serverp->flags & CM_SERVERFLAG_DOWN)) {
                     _InterlockedOr(&serverp->flags, CM_SERVERFLAG_DOWN);
@@ -1128,7 +1160,8 @@ cm_Analyze(cm_conn_t *connp,
                  (reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
 
         if (serverp) {
-            if (reqp->flags & CM_REQ_NEW_CONN_FORCED) {
+           if ((connp->flags & CM_CONN_FLAG_NEW) ||
+               (reqp->flags & CM_REQ_NEW_CONN_FORCED)) {
                 reqp->errorServp = serverp;
                 reqp->tokenError = errorCode;
             } else {
@@ -1182,6 +1215,9 @@ cm_Analyze(cm_conn_t *connp,
         osi_Log2(afsd_logp, "cm_Analyze: rxkad error code 0x%x (%s)",
                   errorCode, s);
 
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (serverp) {
             reqp->errorServp = serverp;
             reqp->tokenError = errorCode;
@@ -1195,16 +1231,24 @@ cm_Analyze(cm_conn_t *connp,
          * to answer our query.  Therefore, we will retry the request
          * and force the use of another server.
          */
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (serverp) {
             reqp->errorServp = serverp;
             reqp->tokenError = errorCode;
             retry = 1;
         }
     } else if (errorCode == VICECONNBAD || errorCode == VICETOKENDEAD) {
-       cm_ForceNewConnections(serverp);
+        reqp->flags |= CM_REQ_NEW_CONN_FORCED;
+        forcing_new = 1;
+        cm_ForceNewConnections(serverp);
         if ( timeLeft > 2 )
             retry = 1;
     } else {
+       if (connp)
+           _InterlockedAnd(&connp->flags, ~CM_CONN_FLAG_NEW);
+
         if (errorCode) {
             char * s = "unknown error";
             switch ( errorCode ) {
@@ -1338,6 +1382,7 @@ cm_Analyze(cm_conn_t *connp,
         cm_PutConn(connp);
 
     /*
+
      * clear the volume updated flag if we succeed.
      * this way the flag will not prevent a subsequent volume
      * from being updated if necessary.
@@ -1563,7 +1608,7 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
      * Setting idle dead timeout to a non-zero value activates RX_CALL_IDLE errors
      */
     if (replicated) {
-        tcp->flags &= CM_CONN_FLAG_REPLICATION;
+       _InterlockedOr(&tcp->flags, CM_CONN_FLAG_REPLICATION);
         rx_SetConnIdleDeadTime(tcp->rxconnp, ReplicaIdleDeadtimeout);
     } else {
         rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout);
@@ -1589,6 +1634,9 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
     tcp->ucgen = ucellp->gen;
     if (secObjp)
         rxs_Release(secObjp);   /* Decrement the initial refCount */
+
+    _InterlockedAnd(&tcp->flags, ~CM_CONN_FLAG_FORCE_NEW);
+    _InterlockedOr(&tcp->flags, CM_CONN_FLAG_NEW);
 }
 
 long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, afs_uint32 replicated, cm_conn_t **connpp)
@@ -1652,7 +1700,6 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, afs_uint32 replicat
                 osi_Log0(afsd_logp, "cm_ConnByServer replace connection due to token update");
             else
                 osi_Log0(afsd_logp, "cm_ConnByServer replace connection due to crypt change");
-            tcp->flags &= ~CM_CONN_FLAG_FORCE_NEW;
             rx_SetConnSecondsUntilNatPing(tcp->rxconnp, 0);
             rx_DestroyConnection(tcp->rxconnp);
             cm_NewRXConnection(tcp, ucellp, serverp, replicated);
@@ -1775,7 +1822,7 @@ void cm_ForceNewConnections(cm_server_t *serverp)
     lock_ObtainWrite(&cm_connLock);
     for (tcp = serverp->connsp; tcp; tcp=tcp->nextp) {
        lock_ObtainMutex(&tcp->mx);
-       tcp->flags |= CM_CONN_FLAG_FORCE_NEW;
+       _InterlockedOr(&tcp->flags, CM_CONN_FLAG_FORCE_NEW);
        lock_ReleaseMutex(&tcp->mx);
     }
     lock_ReleaseWrite(&cm_connLock);