windows-ipaddr-change-server-deadlock-20080130
[openafs.git] / src / WINNT / afsd / cm_server.c
index f04aec9..a64cfed 100644 (file)
 #include <afs/param.h>
 #include <afs/stds.h>
 
-#ifndef DJGPP
 #include <windows.h>
 #include <winsock2.h>
 #include <nb30.h>
-#else
-#include <sys/socket.h>
-#endif /* !DJGPP */
 #include <stdlib.h>
 #include <malloc.h>
 #include <string.h>
 
 #include "afsd.h"
+#include <WINNT\syscfg.h>
 #include <osi.h>
 #include <rx/rx.h>
 
@@ -34,13 +31,13 @@ cm_ForceNewConnectionsAllServers(void)
 {
     cm_server_t *tsp;
 
-    lock_ObtainRead(&cm_serverLock);
+    lock_ObtainWrite(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
         cm_GetServerNoLock(tsp);
        cm_ForceNewConnections(tsp);
         cm_PutServerNoLock(tsp);
     }
-    lock_ReleaseRead(&cm_serverLock);
+    lock_ReleaseWrite(&cm_serverLock);
 }
 
 void 
@@ -49,11 +46,12 @@ cm_PingServer(cm_server_t *tsp)
     long code;
     int wasDown = 0;
     cm_conn_t *connp;
-    struct rx_connection * callp;
+    struct rx_connection * rxconnp;
     long secs;
     long usecs;
     Capabilities caps = {0, 0};
     char hoststr[16];
+    cm_req_t req;
 
     lock_ObtainMutex(&tsp->mx);
     if (tsp->flags & CM_SERVERFLAG_PINGING) {
@@ -86,21 +84,21 @@ cm_PingServer(cm_server_t *tsp)
                  wasDown ? "down" : "up",
                  tsp->capabilities);
 
+        rxconnp = cm_GetRxConn(connp);
        if (wasDown)
-           rx_SetConnDeadTime(connp->callp, 10);
+           rx_SetConnDeadTime(rxconnp, 10);
        if (tsp->type == CM_SERVER_VLDB) {
-           code = VL_ProbeServer(connp->callp);
+           code = VL_ProbeServer(rxconnp);
        }
        else {
            /* file server */
-           callp = cm_GetRxConn(connp);
-           code = RXAFS_GetCapabilities(callp, &caps);
+           code = RXAFS_GetCapabilities(rxconnp, &caps);
            if (code == RXGEN_OPCODE)
-               code = RXAFS_GetTime(callp, &secs, &usecs);
-           rx_PutConnection(callp);
+               code = RXAFS_GetTime(rxconnp, &secs, &usecs);
        }
        if (wasDown)
-           rx_SetConnDeadTime(connp->callp, ConnDeadtimeout);
+           rx_SetConnDeadTime(rxconnp, ConnDeadtimeout);
+        rx_PutConnection(rxconnp);
        cm_PutConn(connp);
     }  /* got an unauthenticated connection to this server */
 
@@ -108,6 +106,7 @@ cm_PingServer(cm_server_t *tsp)
     if (code >= 0) {
        /* mark server as up */
        tsp->flags &= ~CM_SERVERFLAG_DOWN;
+        tsp->downTime = 0;
 
        /* we currently handle 32-bits of capabilities */
        if (caps.Capabilities_len > 0) {
@@ -123,9 +122,36 @@ cm_PingServer(cm_server_t *tsp)
                  osi_LogSaveString(afsd_logp, hoststr), 
                  tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
                  tsp->capabilities);
+
+        /* Now update the volume status if necessary */
+        if (wasDown) {
+            cm_server_vols_t * tsrvp;
+            cm_volume_t * volp;
+            int i;
+
+            for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                for (i=0; i<NUM_SERVER_VOLS; i++) {
+                    if (tsrvp->ids[i] != 0) {
+                        cm_InitReq(&req);
+
+                        lock_ReleaseMutex(&tsp->mx);
+                        code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                        lock_ObtainMutex(&tsp->mx);
+                        if (code == 0) {
+                            cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                            cm_PutVolume(volp);
+                        }
+                    }
+                }
+            }
+        }
     } else {
        /* mark server as down */
-       tsp->flags |= CM_SERVERFLAG_DOWN;
+        if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+            tsp->flags |= CM_SERVERFLAG_DOWN;
+            tsp->downTime = osi_Time();
+        }
        if (code != VRESTARTING)
            cm_ForceNewConnections(tsp);
 
@@ -133,6 +159,30 @@ cm_PingServer(cm_server_t *tsp)
                  osi_LogSaveString(afsd_logp, hoststr), 
                  tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
                  tsp->capabilities);
+
+        /* Now update the volume status if necessary */
+        if (!wasDown) {
+            cm_server_vols_t * tsrvp;
+            cm_volume_t * volp;
+            int i;
+
+            for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                for (i=0; i<NUM_SERVER_VOLS; i++) {
+                    if (tsrvp->ids[i] != 0) {
+                        cm_InitReq(&req);
+
+                        lock_ReleaseMutex(&tsp->mx);
+                        code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                        lock_ObtainMutex(&tsp->mx);
+                        if (code == 0) {
+                            cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                            cm_PutVolume(volp);
+                        }
+                    }
+                }
+            }
+        }
     }
 
     if (tsp->waitCount == 0)
@@ -152,6 +202,7 @@ void cm_CheckServers(long flags, cm_cell_t *cellp)
     cm_server_t *tsp;
     int doPing;
     int isDown;
+    int isFS;
 
     lock_ObtainWrite(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
@@ -163,6 +214,7 @@ void cm_CheckServers(long flags, cm_cell_t *cellp)
 
         doPing = 0;
         isDown = tsp->flags & CM_SERVERFLAG_DOWN;
+        isFS   = tsp->type == CM_SERVER_FILE;
 
         /* only do the ping if the cell matches the requested cell, or we're
          * matching all cells (cellp == NULL), and if we've requested to ping
@@ -170,7 +222,11 @@ void cm_CheckServers(long flags, cm_cell_t *cellp)
          */
         if ((cellp == NULL || cellp == tsp->cellp) &&
              ((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
-               (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
+               (!isDown && (flags & CM_FLAG_CHECKUPSERVERS))) &&
+             ((!(flags & CM_FLAG_CHECKVLDBSERVERS) || 
+               !isFS && (flags & CM_FLAG_CHECKVLDBSERVERS)) &&
+              (!(flags & CM_FLAG_CHECKFILESERVERS) || 
+                 isFS && (flags & CM_FLAG_CHECKFILESERVERS)))) {
             doPing = 1;
         }      /* we're supposed to check this up/down server */
         lock_ReleaseMutex(&tsp->mx);
@@ -217,13 +273,13 @@ void cm_GetServerNoLock(cm_server_t *serverp)
 void cm_PutServer(cm_server_t *serverp)
 {
     lock_ObtainWrite(&cm_serverLock);
-    osi_assert(serverp->refCount-- > 0);
+    osi_assertx(serverp->refCount-- > 0, "cm_server_t refCount 0");
     lock_ReleaseWrite(&cm_serverLock);
 }
 
 void cm_PutServerNoLock(cm_server_t *serverp)
 {
-    osi_assert(serverp->refCount-- > 0);
+    osi_assertx(serverp->refCount-- > 0, "cm_server_t refCount 0");
 }
 
 void cm_SetServerNo64Bit(cm_server_t * serverp, int no64bit)
@@ -305,28 +361,47 @@ void cm_SetServerPrefs(cm_server_t * serverp)
     } /* and of for loop */
 }
 
-cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp) {
+cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp, afs_uint32 flags) {
     cm_server_t *tsp;
 
-    osi_assert(socketp->sin_family == AF_INET);
+    osi_assertx(socketp->sin_family == AF_INET, "unexpected socket family");
 
     tsp = malloc(sizeof(*tsp));
-    memset(tsp, 0, sizeof(*tsp));
-    tsp->type = type;
-    tsp->cellp = cellp;
-    tsp->refCount = 1;
-    lock_InitializeMutex(&tsp->mx, "cm_server_t mutex");
-    tsp->addr = *socketp;
-    tsp->flags = CM_SERVERFLAG_DOWN;   /* assume down; ping will mark up if available */
-
-    cm_SetServerPrefs(tsp); 
-
-    lock_ObtainWrite(&cm_serverLock);  /* get server lock */
-    tsp->allNextp = cm_allServersp;
-    cm_allServersp = tsp;
-    lock_ReleaseWrite(&cm_serverLock);         /* release server lock */
-
-    cm_PingServer(tsp);                        /* Obtain Capabilities and check up/down state */
+    if (tsp) {
+        memset(tsp, 0, sizeof(*tsp));
+        tsp->type = type;
+        tsp->cellp = cellp;
+        tsp->refCount = 1;
+        lock_InitializeMutex(&tsp->mx, "cm_server_t mutex");
+        tsp->addr = *socketp;
+        tsp->flags = CM_SERVERFLAG_DOWN;       /* assume down; ping will mark up if available */
+
+        cm_SetServerPrefs(tsp); 
+
+        lock_ObtainWrite(&cm_serverLock);      /* get server lock */
+        tsp->allNextp = cm_allServersp;
+        cm_allServersp = tsp;
+        lock_ReleaseWrite(&cm_serverLock);     /* release server lock */
+
+        if ( !(flags & CM_FLAG_NOPROBE) )
+            cm_PingServer(tsp);                        /* Obtain Capabilities and check up/down state */
+    }
+    return tsp;
+}
+
+cm_server_t *
+cm_FindServerByIP(afs_uint32 ipaddr, int type)
+{
+    cm_server_t *tsp;
+
+    lock_ObtainRead(&cm_serverLock);
+    for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+        if (tsp->type == type &&
+            tsp->addr.sin_addr.S_un.S_addr == ipaddr)
+            break;
+    }
+    lock_ReleaseRead(&cm_serverLock);
+
     return tsp;
 }
 
@@ -335,7 +410,7 @@ cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type)
 {
     cm_server_t *tsp;
 
-    osi_assert(addrp->sin_family == AF_INET);
+    osi_assertx(addrp->sin_family == AF_INET, "unexpected socket value");
         
     lock_ObtainWrite(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp=tsp->allNextp) {
@@ -355,17 +430,73 @@ cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type)
     return tsp;
 }       
 
-cm_serverRef_t *cm_NewServerRef(cm_server_t *serverp)
+cm_server_vols_t *cm_NewServerVols(void) {
+    cm_server_vols_t *tsvp;
+
+    tsvp = malloc(sizeof(*tsvp));
+    if (tsvp)
+        memset(tsvp, 0, sizeof(*tsvp));
+
+    return tsvp;
+}
+
+cm_serverRef_t *cm_NewServerRef(cm_server_t *serverp, afs_uint32 volID)
 {
     cm_serverRef_t *tsrp;
+    cm_server_vols_t **tsrvpp = NULL;
+    afs_uint32 *slotp = NULL;
+    int found = 0;
 
     cm_GetServer(serverp);
     tsrp = malloc(sizeof(*tsrp));
     tsrp->server = serverp;
-    tsrp->status = not_busy;
+    tsrp->status = srv_not_busy;
     tsrp->next = NULL;
+    tsrp->volID = volID;
     tsrp->refCount = 1;
 
+    /* if we have a non-zero volID, we need to add it to the list
+     * of volumes maintained by the server.  There are two phases:
+     * (1) see if the volID is already in the list and (2) insert
+     * it into the first empty slot if it is not.
+     */
+    if (volID) {
+        lock_ObtainMutex(&serverp->mx);
+
+        tsrvpp = &serverp->vols;
+        while (*tsrvpp) {
+            int i;
+
+            for (i=0; i<NUM_SERVER_VOLS; i++) {
+                if ((*tsrvpp)->ids[i] == volID) {
+                    found = 1;
+                    break;
+                } else if (!slotp && (*tsrvpp)->ids[i] == 0) {
+                    slotp = &(*tsrvpp)->ids[i];
+                }
+            }
+
+            if (found)
+                break;
+
+            tsrvpp = &(*tsrvpp)->nextp;
+        }
+
+        if (!found) {
+            if (slotp) {
+                *slotp = volID;
+            } else {
+                /* if we didn't find an empty slot in a current
+                 * page we must need a new page */
+                *tsrvpp = cm_NewServerVols();
+                if (*tsrvpp)
+                    (*tsrvpp)->ids[0] = volID;
+            }
+        }
+
+        lock_ReleaseMutex(&serverp->mx);
+    }
+
     return tsrp;
 }
 
@@ -515,6 +646,8 @@ void cm_RandomizeServer(cm_serverRef_t** list)
 /* call cm_FreeServer while holding a write lock on cm_serverLock */
 void cm_FreeServer(cm_server_t* serverp)
 {
+    cm_server_vols_t * tsrvp, *nextp;
+
     cm_PutServerNoLock(serverp);
     if (serverp->refCount == 0)
     {
@@ -538,12 +671,37 @@ void cm_FreeServer(cm_server_t* serverp)
                    }
                }
             }
+
+            /* free the volid list */
+            for ( tsrvp = serverp->vols; tsrvp; tsrvp = nextp) {
+                nextp = tsrvp->nextp;
+                free(tsrvp);
+            }
+
            free(serverp);
         }
     }
 }
 
-void cm_FreeServerList(cm_serverRef_t** list)
+void cm_RemoveVolumeFromServer(cm_server_t * serverp, afs_uint32 volID)
+{
+    cm_server_vols_t * tsrvp;
+    int i;
+
+    if (volID == 0)
+        return;
+
+    for (tsrvp = serverp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+        for (i=0; i<NUM_SERVER_VOLS; i++) {
+            if (tsrvp->ids[i] == volID) {
+                tsrvp->ids[i] = 0;;
+                break;
+            }
+        }
+    }
+}
+
+void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
 {
     cm_serverRef_t  **current = list;
     cm_serverRef_t  **nextp = 0;
@@ -556,14 +714,21 @@ void cm_FreeServerList(cm_serverRef_t** list)
         nextp = &(*current)->next;
         if (--((*current)->refCount) == 0) {
             next = *nextp;
+
+            if ((*current)->volID)
+                cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
             cm_FreeServer((*current)->server);
             free(*current);
             *current = next;
         } else {
-           current = nextp;
+            if (flags & CM_FREESERVERLIST_DELETE) {
+                (*current)->status = srv_deleted;
+                if ((*current)->volID)
+                    cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
+            }
+            current = nextp;
         }
     }
   
     lock_ReleaseWrite(&cm_serverLock);
 }
-