down-before-busy-20040723
[openafs.git] / src / WINNT / afsd / cm_conn.c
index 33e8daf..b1ee331 100644 (file)
@@ -28,6 +28,8 @@
 osi_rwlock_t cm_connLock;
 
 long RDRtimeout = CM_CONN_DEFAULTRDRTIMEOUT;
+long ConnDeadtimeout = CM_CONN_CONNDEADTIME;
+long HardDeadtimeout = CM_CONN_HARDDEADTIME;
 
 #define LANMAN_WKS_PARAM_KEY "SYSTEM\\CurrentControlSet\\Services\\lanmanworkstation\\parameters"
 #define LANMAN_WKS_SESSION_TIMEOUT "SessTimeout"
@@ -52,8 +54,11 @@ void cm_InitConn(void)
                lock_InitializeRWLock(&cm_connLock, "connection global lock");
 
         /* keisa - read timeout value for lanmanworkstation  service.
-         * It is used as hardtimeout for connections. 
-         * Default value is 45 
+         * jaltman - as per 
+         *   http://support.microsoft.com:80/support/kb/articles/Q102/0/67.asp&NoWebContent=1
+         * the SessTimeout is a minimum timeout not a maximum timeout.  Therefore, 
+         * I believe that the default should not be short.  Instead, we should wait until
+         * RX times out before reporting a timeout to the SMB client.
          */
                code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, LANMAN_WKS_PARAM_KEY,
                             0, KEY_QUERY_VALUE, &parmKey);
@@ -66,13 +71,17 @@ void cm_InitConn(void)
             {
                 afsi_log("lanmanworkstation : SessTimeout %d", sessTimeout);
                 RDRtimeout = sessTimeout;
-            }
-                   else
-            {
-                RDRtimeout = CM_CONN_DEFAULTRDRTIMEOUT;
+                if ( ConnDeadtimeout < RDRtimeout + 15 ) {
+                    ConnDeadtimeout = RDRtimeout + 15;
+                    afsi_log("ConnDeadTimeout increased to %d", ConnDeadtimeout);
+                }
+                if ( HardDeadtimeout < 2 * ConnDeadtimeout ) {
+                    HardDeadtimeout = 2 * ConnDeadtimeout;
+                    afsi_log("HardDeadTimeout increased to %d", HardDeadtimeout);
+                }
             }
         }
-               
+
         osi_EndOnce(&once);
     }
 }
@@ -137,6 +146,7 @@ long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
  *
  * volSyncp and/or cbrp may also be NULL.
  */
+int
 cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
        struct cm_fid *fidp,
        AFSVolSync *volSyncp, cm_callbackRequest_t *cbrp, long errorCode)
@@ -163,7 +173,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
        if (reqp->flags & CM_REQ_NORETRY)
                goto out;
 
-       /* if timeout - check that is did not exceed the SMB timeout
+       /* if timeout - check that it did not exceed the SMB timeout
           and retry */
        if (errorCode == CM_ERROR_TIMEDOUT)
     {
@@ -191,17 +201,33 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
            osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
            thrd_Sleep(5000);
            /* cm_ForceUpdateVolume marks all servers as non_busy */
-           cm_ForceUpdateVolume(fidp, userp, reqp);
+               /* No it doesn't.  It won't do anything if all of the 
+                * the servers are marked as DOWN.  So clear the DOWN
+                * flag and reset the busy state as well.
+                */
+               cm_GetServerList(fidp, userp, reqp, &serversp);
+               lock_ObtainWrite(&cm_serverLock);
+               for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+               tsrp->server->flags &= ~CM_SERVERFLAG_DOWN;
+                       if (tsrp->status == busy)
+                               tsrp->status = not_busy;
+               }
+        lock_ReleaseWrite(&cm_serverLock);
+
+        if (fidp != NULL)
+            cm_ForceUpdateVolume(fidp, userp, reqp);
            retry = 1;
        }
 
        /* if all servers are busy, mark them non-busy and start over */
        if (errorCode == CM_ERROR_ALLBUSY) {
                cm_GetServerList(fidp, userp, reqp, &serversp);
+               lock_ObtainWrite(&cm_serverLock);
                for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
                        if (tsrp->status == busy)
                                tsrp->status = not_busy;
                }
+        lock_ReleaseWrite(&cm_serverLock);
                thrd_Sleep(5000);
                retry = 1;
        }
@@ -209,6 +235,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
        /* special codes:  VBUSY and VRESTARTING */
        if (errorCode == VBUSY || errorCode == VRESTARTING) {
                cm_GetServerList(fidp, userp, reqp, &serversp);
+               lock_ObtainWrite(&cm_serverLock);
                for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
                        if (tsrp->server == serverp
                            && tsrp->status == not_busy) {
@@ -216,15 +243,13 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
                                break;
                        }
                }
+        lock_ReleaseWrite(&cm_serverLock);
                retry = 1;
        }
 
        /* special codes:  missing volumes */
        if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE
            || errorCode == VSALVAGE || errorCode == VNOSERVICE) {
-               long newSum;
-               int same;
-
                /* Log server being offline for this volume */
                osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d marked offline for a volume",
                         ((serverp->addr.sin_addr.s_addr & 0xff)),
@@ -321,7 +346,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
        cm_serverRef_t *tsrp;
         cm_server_t *tsp;
         long firstError = 0;
-       int someBusy = 0, someOffline = 0;
+       int someBusy = 0, someOffline = 0, allBusy = 1, allDown = 1;
        long timeUsed, timeLeft, hardTimeLeft;
 #ifdef DJGPP
         struct timeval now;
@@ -337,13 +362,8 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
 #endif
         
        /* leave 5 seconds margin of safety */
-       timeLeft = RDRtimeout - timeUsed - 5;
-       hardTimeLeft = timeLeft;
-
-       /* Time enough to do an RPC? */
-       if (timeLeft < 1) {
-               return CM_ERROR_TIMEDOUT;
-       }
+       timeLeft =  ConnDeadtimeout - timeUsed - 5;
+       hardTimeLeft = HardDeadtimeout - timeUsed - 5;
 
        lock_ObtainWrite(&cm_serverLock);
 
@@ -352,20 +372,22 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
         tsp->refCount++;
         lock_ReleaseWrite(&cm_serverLock);
         if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            allDown = 0;
             if (tsrp->status == busy)
                 someBusy = 1;
             else if (tsrp->status == offline)
                 someOffline = 1;
             else {
+                               allBusy = 0;
                 code = cm_ConnByServer(tsp, usersp, connpp);
                 if (code == 0) {
                     cm_PutServer(tsp);
                     /* Set RPC timeout */
-                    if (timeLeft > CM_CONN_CONNDEADTIME)
-                        timeLeft = CM_CONN_CONNDEADTIME;
+                    if (timeLeft > ConnDeadtimeout)
+                        timeLeft = ConnDeadtimeout;
 
-                    if (hardTimeLeft > CM_CONN_HARDDEADTIME) 
-                        hardTimeLeft = CM_CONN_HARDDEADTIME;
+                    if (hardTimeLeft > HardDeadtimeout) 
+                        hardTimeLeft = HardDeadtimeout;
 
                     lock_ObtainMutex(&(*connpp)->mx);
                     rx_SetConnDeadTime((*connpp)->callp,
@@ -379,19 +401,24 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
                 if (firstError == 0) 
                     firstError = code;
             }
-        }
+               } 
         lock_ObtainWrite(&cm_serverLock);
         osi_assert(tsp->refCount-- > 0);
     }   
 
        lock_ReleaseWrite(&cm_serverLock);
        if (firstError == 0) {
-               if (someBusy) firstError = CM_ERROR_ALLBUSY;
-               else if (someOffline) firstError = CM_ERROR_ALLOFFLINE;
-               else if (serversp) firstError = CM_ERROR_TIMEDOUT;
-               /* Only return CM_ERROR_NOSUCHVOLUME if there are no
-                  servers for this volume */
-               else firstError = CM_ERROR_NOSUCHVOLUME;
+               if (allDown) 
+                       firstError = CM_ERROR_ALLOFFLINE;
+               else if (allBusy) 
+                       firstError = CM_ERROR_ALLBUSY;
+               else if (serversp == NULL) 
+                       /* Only return CM_ERROR_NOSUCHVOLUME if there are no
+                        * servers for this volume 
+                        */
+                       firstError = CM_ERROR_NOSUCHVOLUME;
+               else
+                       firstError = CM_ERROR_TIMEDOUT;
        }
        osi_Log1(afsd_logp, "cm_ConnByMServers returning %x", firstError);
     return firstError;
@@ -401,8 +428,8 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
 void cm_GCConnections(cm_server_t *serverp)
 {
        cm_conn_t *tcp;
-        cm_conn_t **lcpp;
-        cm_user_t *userp;
+    cm_conn_t **lcpp;
+    cm_user_t *userp;
 
        lock_ObtainWrite(&cm_connLock);
        lcpp = &serverp->connsp;
@@ -410,38 +437,38 @@ void cm_GCConnections(cm_server_t *serverp)
                userp = tcp->userp;
                if (userp && tcp->refCount == 0 && (userp->vcRefs == 0)) {
                        /* do the deletion of this guy */
-                        cm_ReleaseUser(userp);
-                        *lcpp = tcp->nextp;
+            cm_ReleaseUser(userp);
+            *lcpp = tcp->nextp;
                        rx_DestroyConnection(tcp->callp);
-                        lock_FinalizeMutex(&tcp->mx);
-                        free(tcp);
-                }
-                else {
+            lock_FinalizeMutex(&tcp->mx);
+            free(tcp);
+        }
+        else {
                        /* just advance to the next */
-                        lcpp = &tcp->nextp;
-                }
+            lcpp = &tcp->nextp;
         }
+    }
        lock_ReleaseWrite(&cm_connLock);
 }
 
 static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
        cm_server_t *serverp)
 {
-        unsigned short port;
-        int serviceID;
-        int secIndex;
-        struct rx_securityClass *secObjp;
+    unsigned short port;
+    int serviceID;
+    int secIndex;
+    struct rx_securityClass *secObjp;
        afs_int32 level;
 
        if (serverp->type == CM_SERVER_VLDB) {
                port = htons(7003);
-                serviceID = 52;
-        }
-        else {
+        serviceID = 52;
+    }
+    else {
                osi_assert(serverp->type == CM_SERVER_FILE);
-                port = htons(7000);
-                serviceID = 1;
-        }
+        port = htons(7000);
+        serviceID = 1;
+    }
        if (ucellp->flags & CM_UCELLFLAG_RXKAD) {
                secIndex = 2;
                if (cryptall) {
@@ -450,67 +477,69 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
                } else {
                        level = rxkad_clear;
                }
-                secObjp = rxkad_NewClientSecurityObject(level,
-                       &ucellp->sessionKey, ucellp->kvno,
-                       ucellp->ticketLen, ucellp->ticketp);
-        }
-        else {
-               /* normal auth */
-                secIndex = 0;
-                secObjp = rxnull_NewClientSecurityObject();
-        }
+        secObjp = rxkad_NewClientSecurityObject(level,
+                                                &ucellp->sessionKey, ucellp->kvno,
+                                                ucellp->ticketLen, ucellp->ticketp);    
+    }
+    else {
+        /* normal auth */
+        secIndex = 0;
+        secObjp = rxnull_NewClientSecurityObject();
+    }
        osi_assert(secObjp != NULL);
-        tcp->callp = rx_NewConnection(serverp->addr.sin_addr.s_addr,
-                       port,
-               serviceID,
-               secObjp,
-                secIndex);
-       rx_SetConnDeadTime(tcp->callp, CM_CONN_CONNDEADTIME);
-       rx_SetConnHardDeadTime(tcp->callp, CM_CONN_HARDDEADTIME);
+    tcp->callp = rx_NewConnection(serverp->addr.sin_addr.s_addr,
+                                  port,
+                                  serviceID,
+                                  secObjp,
+                                  secIndex);
+       rx_SetConnDeadTime(tcp->callp, ConnDeadtimeout);
+       rx_SetConnHardDeadTime(tcp->callp, HardDeadtimeout);
        tcp->ucgen = ucellp->gen;
+    if (secObjp)
+        rxs_Release(secObjp);   /* Decrement the initial refCount */
 }
 
 long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
 {
        cm_conn_t *tcp;
-        cm_ucell_t *ucellp;
+    cm_ucell_t *ucellp;
 
        lock_ObtainMutex(&userp->mx);
        lock_ObtainWrite(&cm_connLock);
        for(tcp = serverp->connsp; tcp; tcp=tcp->nextp) {
                if (tcp->userp == userp) break;
-        }
+    }
        /* find ucell structure */
-        ucellp = cm_GetUCell(userp, serverp->cellp);
+    ucellp = cm_GetUCell(userp, serverp->cellp);
        if (!tcp) {
                tcp = malloc(sizeof(*tcp));
-                memset(tcp, 0, sizeof(*tcp));
-                tcp->nextp = serverp->connsp;
-                serverp->connsp = tcp;
-                tcp->userp = userp;
-                cm_HoldUser(userp);
-                lock_InitializeMutex(&tcp->mx, "cm_conn_t mutex");
-                tcp->serverp = serverp;
+        memset(tcp, 0, sizeof(*tcp));
+        tcp->nextp = serverp->connsp;
+        serverp->connsp = tcp;
+        cm_HoldUser(userp);
+        tcp->userp = userp;
+        lock_InitializeMutex(&tcp->mx, "cm_conn_t mutex");
+        tcp->serverp = serverp;
                tcp->cryptlevel = rxkad_clear;
                cm_NewRXConnection(tcp, ucellp, serverp);
                tcp->refCount = 1;
-        }
+    }
        else {
                if ((tcp->ucgen < ucellp->gen) || (tcp->cryptlevel != cryptall))
                {
                        rx_DestroyConnection(tcp->callp);
                        cm_NewRXConnection(tcp, ucellp, serverp);
                }
-               tcp->refCount++;
+        tcp->refCount++;
        }
        lock_ReleaseWrite(&cm_connLock);
-        lock_ReleaseMutex(&userp->mx);
+    lock_ReleaseMutex(&userp->mx);
 
        /* return this pointer to our caller */
-        osi_Log1(afsd_logp, "cm_ConnByServer returning conn 0x%x", (long) tcp);
+    osi_Log1(afsd_logp, "cm_ConnByServer returning conn 0x%x", (long) tcp);
        *connpp = tcp;
 
-        return 0;
+    return 0;
 }
 
 long cm_Conn(struct cm_fid *fidp, struct cm_user *userp, cm_req_t *reqp,