windows-server-find-by-uuid-20090614
[openafs.git] / src / WINNT / afsd / cm_server.c
index 27bcfa4..68fa4b4 100644 (file)
 
 #include "afsd.h"
 #include <WINNT\syscfg.h>
+#include <WINNT/afsreg.h>
 #include <osi.h>
 #include <rx/rx.h>
 
 osi_rwlock_t cm_serverLock;
+osi_rwlock_t cm_syscfgLock;
 
 cm_server_t *cm_allServersp;
 afs_uint32   cm_numFileServers = 0;
@@ -33,13 +35,15 @@ cm_ForceNewConnectionsAllServers(void)
 {
     cm_server_t *tsp;
 
-    lock_ObtainWrite(&cm_serverLock);
+    lock_ObtainRead(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
         cm_GetServerNoLock(tsp);
+        lock_ReleaseRead(&cm_serverLock);
        cm_ForceNewConnections(tsp);
+        lock_ObtainRead(&cm_serverLock);
         cm_PutServerNoLock(tsp);
     }
-    lock_ReleaseWrite(&cm_serverLock);
+    lock_ReleaseRead(&cm_serverLock);
 }
 
 void 
@@ -113,7 +117,7 @@ cm_PingServer(cm_server_t *tsp)
        /* we currently handle 32-bits of capabilities */
        if (caps.Capabilities_len > 0) {
            tsp->capabilities = caps.Capabilities_val[0];
-           free(caps.Capabilities_val);
+           xdr_free(caps.Capabilities_val, caps.Capabilities_len);
            caps.Capabilities_len = 0;
            caps.Capabilities_val = 0;
        } else {
@@ -137,7 +141,7 @@ cm_PingServer(cm_server_t *tsp)
                         cm_InitReq(&req);
 
                         lock_ReleaseMutex(&tsp->mx);
-                        code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                        code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                         lock_ObtainMutex(&tsp->mx);
                         if (code == 0) {
@@ -154,9 +158,11 @@ cm_PingServer(cm_server_t *tsp)
             tsp->flags |= CM_SERVERFLAG_DOWN;
             tsp->downTime = time(NULL);
         }
-       if (code != VRESTARTING)
+       if (code != VRESTARTING) {
+            lock_ReleaseMutex(&tsp->mx);
            cm_ForceNewConnections(tsp);
-
+            lock_ObtainMutex(&tsp->mx);
+        }
        osi_Log3(afsd_logp, "cm_PingServer server %s (%s) is down with caps 0x%x",
                  osi_LogSaveString(afsd_logp, hoststr), 
                  tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
@@ -174,7 +180,7 @@ cm_PingServer(cm_server_t *tsp)
                         cm_InitReq(&req);
 
                         lock_ReleaseMutex(&tsp->mx);
-                        code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                        code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                         lock_ObtainMutex(&tsp->mx);
                         if (code == 0) {
@@ -194,9 +200,7 @@ cm_PingServer(cm_server_t *tsp)
     lock_ReleaseMutex(&tsp->mx);
 }
 
-#define MULTI_CHECKSERVERS 1
-#ifndef MULTI_CHECKSERVERS
-void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
+static void cm_CheckServersSingular(afs_uint32 flags, cm_cell_t *cellp)
 {
     /* ping all file servers, up or down, with unauthenticated connection,
      * to find out whether we have all our callbacks from the server still.
@@ -207,10 +211,10 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
     int isDown;
     int isFS;
 
-    lock_ObtainWrite(&cm_serverLock);
+    lock_ObtainRead(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
         cm_GetServerNoLock(tsp);
-        lock_ReleaseWrite(&cm_serverLock);
+        lock_ReleaseRead(&cm_serverLock);
 
         /* now process the server */
         lock_ObtainMutex(&tsp->mx);
@@ -245,13 +249,13 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
          */
         cm_GCConnections(tsp);
 
-        lock_ObtainWrite(&cm_serverLock);
+        lock_ObtainRead(&cm_serverLock);
         cm_PutServerNoLock(tsp);
     }
-    lock_ReleaseWrite(&cm_serverLock);
+    lock_ReleaseRead(&cm_serverLock);
 }       
-#else /* MULTI_CHECKSERVERS */
-void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
+
+static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp)
 {
     /* 
      * The goal of this function is to probe simultaneously 
@@ -271,7 +275,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
     cm_conn_t **conns = NULL;
     struct rx_connection **rxconns = NULL;
     cm_req_t req;
-    afs_int32 i, j, nconns = 0;
+    afs_int32 i, j, nconns = 0, maxconns;
     afs_int32 *conntimer, *results;
     Capabilities *caps = NULL;
     cm_server_t ** serversp, *tsp;
@@ -283,31 +287,32 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
     char hoststr[16];
 
     cm_InitReq(&req);
+    maxconns = max(cm_numFileServers,cm_numVldbServers);
+    if (maxconns == 0)
+        return;
 
-    j = max(cm_numFileServers,cm_numVldbServers);
-    conns = (cm_conn_t **)malloc(j * sizeof(cm_conn_t *));
-    rxconns = (struct rx_connection **)malloc(j * sizeof(struct rx_connection *));
-    conntimer = (afs_int32 *)malloc(j * sizeof (afs_int32));
-    deltas = (time_t *)malloc(j * sizeof (time_t));
-    results = (afs_int32 *)malloc(j * sizeof (afs_int32));
-    serversp = (cm_server_t **)malloc(j * sizeof(cm_server_t *));
-    caps = (Capabilities *)malloc(j * sizeof(Capabilities));
+    conns = (cm_conn_t **)malloc(maxconns * sizeof(cm_conn_t *));
+    rxconns = (struct rx_connection **)malloc(maxconns * sizeof(struct rx_connection *));
+    conntimer = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
+    deltas = (time_t *)malloc(maxconns * sizeof (time_t));
+    results = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
+    serversp = (cm_server_t **)malloc(maxconns * sizeof(cm_server_t *));
+    caps = (Capabilities *)malloc(maxconns * sizeof(Capabilities));
 
-    memset(caps, 0, j * sizeof(Capabilities));
+    memset(caps, 0, maxconns * sizeof(Capabilities));
 
     if ((flags & CM_FLAG_CHECKFILESERVERS) || 
         !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
     {
-        lock_ObtainWrite(&cm_serverLock);
-        nconns = 0;
-        for (nconns=0, tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+        lock_ObtainRead(&cm_serverLock);
+        for (nconns=0, tsp = cm_allServersp; tsp && nconns < maxconns; tsp = tsp->allNextp) {
             if (tsp->type != CM_SERVER_FILE || 
                 tsp->cellp == NULL ||           /* SetPref only */
                 cellp && cellp != tsp->cellp)
                 continue;
 
             cm_GetServerNoLock(tsp);
-            lock_ReleaseWrite(&cm_serverLock);
+            lock_ReleaseRead(&cm_serverLock);
 
             lock_ObtainMutex(&tsp->mx);
             isDown = tsp->flags & CM_SERVERFLAG_DOWN;
@@ -316,7 +321,8 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
                    (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
                 lock_ReleaseMutex(&tsp->mx);
-                lock_ObtainWrite(&cm_serverLock);
+                lock_ObtainRead(&cm_serverLock);
+                cm_PutServerNoLock(tsp);
                 continue;
             }
 
@@ -326,27 +332,28 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
             serversp[nconns] = tsp;
             code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
             if (code) {
-                   lock_ObtainWrite(&cm_serverLock);
+                lock_ObtainRead(&cm_serverLock);
                 cm_PutServerNoLock(tsp);
                 continue;
             }
-            lock_ObtainWrite(&cm_serverLock);
-                       rxconns[nconns] = cm_GetRxConn(conns[nconns]);
+            lock_ObtainRead(&cm_serverLock);
+            rxconns[nconns] = cm_GetRxConn(conns[nconns]);
             if (conntimer[nconns] = (isDown ? 1 : 0))
                 rx_SetConnDeadTime(rxconns[nconns], 10);
 
             nconns++;
         }
-        lock_ReleaseWrite(&cm_serverLock);
-
-        /* Perform the multi call */
-        start = time(NULL);
-        multi_Rx(rxconns,nconns)
-        {
-            multi_RXAFS_GetCapabilities(&caps[multi_i]);
-            results[multi_i]=multi_error;
-        } multi_End;
-
+        lock_ReleaseRead(&cm_serverLock);
+
+        if (nconns) {
+            /* Perform the multi call */
+            start = time(NULL);
+            multi_Rx(rxconns,nconns)
+            {
+                multi_RXAFS_GetCapabilities(&caps[multi_i]);
+                results[multi_i]=multi_error;
+            } multi_End;
+        }
 
         /* Process results of servers that support RXAFS_GetCapabilities */
         for (i=0; i<nconns; i++) {
@@ -373,7 +380,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                 /* we currently handle 32-bits of capabilities */
                 if (caps[i].Capabilities_len > 0) {
                     tsp->capabilities = caps[i].Capabilities_val[0];
-                    free(caps[i].Capabilities_val);
+                    xdr_free(caps[i].Capabilities_val, caps[i].Capabilities_len);
                     caps[i].Capabilities_len = 0;
                     caps[i].Capabilities_val = 0;
                 } else {
@@ -398,7 +405,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                                 cm_InitReq(&req);
 
                                 lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                          &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                                 lock_ObtainMutex(&tsp->mx);
                                 if (code == 0) {
@@ -415,9 +422,11 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                     tsp->flags |= CM_SERVERFLAG_DOWN;
                     tsp->downTime = time(NULL);
                 }
-                if (code != VRESTARTING)
+                if (code != VRESTARTING) {
+                    lock_ReleaseMutex(&tsp->mx);
                     cm_ForceNewConnections(tsp);
-
+                    lock_ObtainMutex(&tsp->mx);
+                }
                 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
                 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
                           osi_LogSaveString(afsd_logp, hoststr), 
@@ -436,7 +445,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                                 cm_InitReq(&req);
 
                                 lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                          &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                                 lock_ObtainMutex(&tsp->mx);
                                 if (code == 0) {
@@ -475,18 +484,19 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
         }
         nconns = j;
 
-        /* Perform the multi call */
-        start = time(NULL);
-        multi_Rx(rxconns,nconns)
-        {
-            secs = usecs = 0;
-            multi_RXAFS_GetTime(&secs, &usecs);
-            end = time(NULL);
-            results[multi_i]=multi_error;
-            if ((start == end) && !multi_error)
-                deltas[multi_i] = end - secs;
-        } multi_End;
-
+        if (nconns) {
+            /* Perform the multi call */
+            start = time(NULL);
+            multi_Rx(rxconns,nconns)
+            {
+                secs = usecs = 0;
+                multi_RXAFS_GetTime(&secs, &usecs);
+                end = time(NULL);
+                results[multi_i]=multi_error;
+                if ((start == end) && !multi_error)
+                    deltas[multi_i] = end - secs;
+            } multi_End;
+        }
 
         /* Process Results of servers that only support RXAFS_GetTime */
         for (i=0; i<nconns; i++) {
@@ -526,7 +536,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                                 cm_InitReq(&req);
 
                                 lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                          &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                                 lock_ObtainMutex(&tsp->mx);
                                 if (code == 0) {
@@ -543,9 +553,11 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                     tsp->flags |= CM_SERVERFLAG_DOWN;
                     tsp->downTime = time(NULL);
                 }
-                if (code != VRESTARTING)
+                if (code != VRESTARTING) {
+                    lock_ReleaseMutex(&tsp->mx);
                     cm_ForceNewConnections(tsp);
-
+                    lock_ObtainMutex(&tsp->mx);
+                }
                 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
                 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
                           osi_LogSaveString(afsd_logp, hoststr), 
@@ -564,7 +576,7 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                                 cm_InitReq(&req);
 
                                 lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
                                                          &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
                                 lock_ObtainMutex(&tsp->mx);
                                 if (code == 0) {
@@ -591,16 +603,15 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
     if ((flags & CM_FLAG_CHECKVLDBSERVERS) || 
         !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
     {
-        lock_ObtainWrite(&cm_serverLock);
-        nconns = 0;
-        for (nconns=0, tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+        lock_ObtainRead(&cm_serverLock);
+        for (nconns=0, tsp = cm_allServersp; tsp && nconns < maxconns; tsp = tsp->allNextp) {
             if (tsp->type != CM_SERVER_VLDB ||
                 tsp->cellp == NULL ||           /* SetPref only */
                 cellp && cellp != tsp->cellp)
                 continue;
 
             cm_GetServerNoLock(tsp);
-            lock_ReleaseWrite(&cm_serverLock);
+            lock_ReleaseRead(&cm_serverLock);
 
             lock_ObtainMutex(&tsp->mx);
             isDown = tsp->flags & CM_SERVERFLAG_DOWN;
@@ -609,7 +620,8 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
                    (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
                 lock_ReleaseMutex(&tsp->mx);
-                lock_ObtainWrite(&cm_serverLock);
+                lock_ObtainRead(&cm_serverLock);
+                cm_PutServerNoLock(tsp);
                 continue;
             }
 
@@ -619,11 +631,11 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
             serversp[nconns] = tsp;
             code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
             if (code) {
-                   lock_ObtainWrite(&cm_serverLock);
+                lock_ObtainRead(&cm_serverLock);
                 cm_PutServerNoLock(tsp);
                 continue;
             }
-            lock_ObtainWrite(&cm_serverLock);
+            lock_ObtainRead(&cm_serverLock);
             rxconns[nconns] = cm_GetRxConn(conns[nconns]);
             conntimer[nconns] = (isDown ? 1 : 0);
             if (isDown)
@@ -631,18 +643,19 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
 
             nconns++;
         }
-        lock_ReleaseWrite(&cm_serverLock);
-
-        /* Perform the multi call */
-        start = time(NULL);
-        multi_Rx(rxconns,nconns)
-        {
-            multi_VL_ProbeServer();
-            results[multi_i]=multi_error;
-        } multi_End;
-
+        lock_ReleaseRead(&cm_serverLock);
+
+        if (nconns) {
+            /* Perform the multi call */
+            start = time(NULL);
+            multi_Rx(rxconns,nconns)
+            {
+                multi_VL_ProbeServer();
+                results[multi_i]=multi_error;
+            } multi_End;
+        }
 
-        /* Process results of servers that support RXAFS_GetCapabilities */
+        /* Process results of servers that support VL_ProbeServer */
         for (i=0; i<nconns; i++) {
             if (conntimer[i])
                 rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
@@ -666,68 +679,22 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
                           osi_LogSaveString(afsd_logp, hoststr), 
                           tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
                           tsp->capabilities);
-
-                /* Now update the volume status if necessary */
-                if (wasDown) {
-                    cm_server_vols_t * tsrvp;
-                    cm_volume_t * volp;
-                    int i;
-
-                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
-                        for (i=0; i<NUM_SERVER_VOLS; i++) {
-                            if (tsrvp->ids[i] != 0) {
-                                cm_InitReq(&req);
-
-                                lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
-                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
-                                lock_ObtainMutex(&tsp->mx);
-                                if (code == 0) {
-                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
-                                    cm_PutVolume(volp);
-                                }
-                            }
-                        }
-                    }
-                }
             } else {
                 /* mark server as down */
                 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
                     tsp->flags |= CM_SERVERFLAG_DOWN;
                     tsp->downTime = time(NULL);
                 }
-                if (code != VRESTARTING)
+                if (code != VRESTARTING) {
+                    lock_ReleaseMutex(&tsp->mx);
                     cm_ForceNewConnections(tsp);
-
+                    lock_ObtainMutex(&tsp->mx);
+                }
                 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
                 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
                           osi_LogSaveString(afsd_logp, hoststr), 
                           tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
                           tsp->capabilities);
-
-                /* Now update the volume status if necessary */
-                if (!wasDown) {
-                    cm_server_vols_t * tsrvp;
-                    cm_volume_t * volp;
-                    int i;
-
-                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
-                        for (i=0; i<NUM_SERVER_VOLS; i++) {
-                            if (tsrvp->ids[i] != 0) {
-                                cm_InitReq(&req);
-
-                                lock_ReleaseMutex(&tsp->mx);
-                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
-                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
-                                lock_ObtainMutex(&tsp->mx);
-                                if (code == 0) {
-                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
-                                    cm_PutVolume(volp);
-                                }
-                            }
-                        }
-                    }
-                }
             }
 
             if (tsp->waitCount == 0)
@@ -746,42 +713,83 @@ void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
     free(conntimer);
     free(deltas);
     free(results);
+    free(serversp);
     free(caps);
 }
-#endif /* MULTI_CHECKSERVERS */
+
+void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
+{
+    DWORD code;
+    HKEY parmKey;
+    DWORD dummyLen;
+    DWORD multi = 1;
+
+    code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
+                         0, KEY_QUERY_VALUE, &parmKey);
+    if (code == ERROR_SUCCESS) {
+        dummyLen = sizeof(multi);
+        code = RegQueryValueEx(parmKey, "MultiCheckServers", NULL, NULL,
+                                (BYTE *) &multi, &dummyLen);
+        RegCloseKey (parmKey);
+    }
+
+    if (multi)
+        cm_CheckServersMulti(flags, cellp);
+    else
+        cm_CheckServersSingular(flags, cellp);
+}
 
 void cm_InitServer(void)
 {
     static osi_once_t once;
         
     if (osi_Once(&once)) {
-        lock_InitializeRWLock(&cm_serverLock, "cm_serverLock");
+        lock_InitializeRWLock(&cm_serverLock, "cm_serverLock", LOCK_HIERARCHY_SERVER_GLOBAL);
+        lock_InitializeRWLock(&cm_syscfgLock, "cm_syscfgLock", LOCK_HIERARCHY_SYSCFG_GLOBAL);
         osi_EndOnce(&once);
     }
 }
 
+/* Protected by cm_syscfgLock (rw) */
+int cm_noIPAddr;         /* number of client network interfaces */
+int cm_IPAddr[CM_MAXINTERFACE_ADDR];    /* client's IP address in host order */
+int cm_SubnetMask[CM_MAXINTERFACE_ADDR];/* client's subnet mask in host order*/
+int cm_NetMtu[CM_MAXINTERFACE_ADDR];    /* client's MTU sizes */
+int cm_NetFlags[CM_MAXINTERFACE_ADDR];  /* network flags */
+int cm_LanAdapterChangeDetected = 1;
+
+void cm_SetLanAdapterChangeDetected(void)
+{
+    lock_ObtainWrite(&cm_syscfgLock);
+    cm_LanAdapterChangeDetected = 1;
+    lock_ReleaseWrite(&cm_syscfgLock);
+}
+
 void cm_GetServer(cm_server_t *serverp)
 {
-    lock_ObtainWrite(&cm_serverLock);
-    serverp->refCount++;
-    lock_ReleaseWrite(&cm_serverLock);
+    lock_ObtainRead(&cm_serverLock);
+    InterlockedIncrement(&serverp->refCount);
+    lock_ReleaseRead(&cm_serverLock);
 }
 
 void cm_GetServerNoLock(cm_server_t *serverp)
 {
-    serverp->refCount++;
+    InterlockedIncrement(&serverp->refCount);
 }
 
 void cm_PutServer(cm_server_t *serverp)
 {
-    lock_ObtainWrite(&cm_serverLock);
-    osi_assertx(serverp->refCount-- > 0, "cm_server_t refCount 0");
-    lock_ReleaseWrite(&cm_serverLock);
+    afs_int32 refCount;
+    lock_ObtainRead(&cm_serverLock);
+    refCount = InterlockedDecrement(&serverp->refCount);
+    osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
+    lock_ReleaseRead(&cm_serverLock);
 }
 
 void cm_PutServerNoLock(cm_server_t *serverp)
 {
-    osi_assertx(serverp->refCount-- > 0, "cm_server_t refCount 0");
+    afs_int32 refCount = InterlockedDecrement(&serverp->refCount);
+    osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
 }
 
 void cm_SetServerNo64Bit(cm_server_t * serverp, int no64bit)
@@ -810,19 +818,23 @@ void cm_SetServerPrefs(cm_server_t * serverp)
     unsigned long      myAddr, myNet, mySubnet;/* in host byte order */
     unsigned long      netMask;
     int                i;
-
-    int cm_noIPAddr;         /* number of client network interfaces */
-    int cm_IPAddr[CM_MAXINTERFACE_ADDR];    /* client's IP address in host order */
-    int cm_SubnetMask[CM_MAXINTERFACE_ADDR];/* client's subnet mask in host order*/
-    int cm_NetMtu[CM_MAXINTERFACE_ADDR];    /* client's MTU sizes */
-    int cm_NetFlags[CM_MAXINTERFACE_ADDR];  /* network flags */
     long code;
-
-    /* get network related info */
-    cm_noIPAddr = CM_MAXINTERFACE_ADDR;
-    code = syscfg_GetIFInfo(&cm_noIPAddr,
-                           cm_IPAddr, cm_SubnetMask,
-                           cm_NetMtu, cm_NetFlags);
+    int writeLock = 0;
+
+    lock_ObtainRead(&cm_syscfgLock);
+    if (cm_LanAdapterChangeDetected) {
+        lock_ConvertRToW(&cm_syscfgLock);
+        writeLock = 1;
+        if (cm_LanAdapterChangeDetected) {
+            /* get network related info */
+            cm_noIPAddr = CM_MAXINTERFACE_ADDR;
+            code = syscfg_GetIFInfo(&cm_noIPAddr,
+                                     cm_IPAddr, cm_SubnetMask,
+                                     cm_NetMtu, cm_NetFlags);
+            cm_LanAdapterChangeDetected = 0;
+        }
+        lock_ConvertWToR(&cm_syscfgLock);
+    }
 
     serverAddr = ntohl(serverp->addr.sin_addr.s_addr);
     serverp->ipRank  = CM_IPRANK_LOW;  /* default setings */
@@ -858,12 +870,14 @@ void cm_SetServerPrefs(cm_server_t * serverp)
            else serverp->ipRank = min(serverp->ipRank,CM_IPRANK_MED);
            /* same net */
        }       
-       /* random between 0..15*/
-       serverp->ipRank += min(serverp->ipRank, rand() % 0x000f);
     } /* and of for loop */
+
+    /* random between 0..15*/
+    serverp->ipRank += (rand() % 0x000f);
+    lock_ReleaseRead(&cm_syscfgLock);
 }
 
-cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp, afs_uint32 flags) {
+cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp, afsUUID *uuidp, afs_uint32 flags) {
     cm_server_t *tsp;
 
     osi_assertx(socketp->sin_family == AF_INET, "unexpected socket family");
@@ -873,8 +887,12 @@ cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cell
         memset(tsp, 0, sizeof(*tsp));
         tsp->type = type;
         tsp->cellp = cellp;
+        if (uuidp && !afs_uuid_is_nil(uuidp)) {
+            tsp->uuid = *uuidp;
+            tsp->flags |= CM_SERVERFLAG_UUID;
+        }
         tsp->refCount = 1;
-        lock_InitializeMutex(&tsp->mx, "cm_server_t mutex");
+        lock_InitializeMutex(&tsp->mx, "cm_server_t mutex", LOCK_HIERARCHY_SERVER);
         tsp->addr = *socketp;
 
         cm_SetServerPrefs(tsp); 
@@ -895,7 +913,7 @@ cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cell
         lock_ReleaseWrite(&cm_serverLock);     /* release server lock */
 
         if ( !(flags & CM_FLAG_NOPROBE) ) {
-            tsp->flags = CM_SERVERFLAG_DOWN;   /* assume down; ping will mark up if available */
+            tsp->flags |= CM_SERVERFLAG_DOWN;  /* assume down; ping will mark up if available */
             cm_PingServer(tsp);                        /* Obtain Capabilities and check up/down state */
         }
     }
@@ -913,6 +931,31 @@ cm_FindServerByIP(afs_uint32 ipaddr, int type)
             tsp->addr.sin_addr.S_un.S_addr == ipaddr)
             break;
     }
+
+    /* bump ref count if we found the server */
+    if (tsp) 
+        cm_GetServerNoLock(tsp);
+
+    lock_ReleaseRead(&cm_serverLock);
+
+    return tsp;
+}
+
+cm_server_t *
+cm_FindServerByUuid(afsUUID *serverUuid, int type)
+{
+    cm_server_t *tsp;
+
+    lock_ObtainRead(&cm_serverLock);
+    for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+        if (tsp->type == type && !afs_uuid_equal(&tsp->uuid, serverUuid))
+            break;
+    }
+
+    /* bump ref count if we found the server */
+    if (tsp) 
+        cm_GetServerNoLock(tsp);
+
     lock_ReleaseRead(&cm_serverLock);
 
     return tsp;
@@ -925,7 +968,7 @@ cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type)
 
     osi_assertx(addrp->sin_family == AF_INET, "unexpected socket value");
         
-    lock_ObtainWrite(&cm_serverLock);
+    lock_ObtainRead(&cm_serverLock);
     for (tsp = cm_allServersp; tsp; tsp=tsp->allNextp) {
         if (tsp->type == type &&
             tsp->addr.sin_addr.s_addr == addrp->sin_addr.s_addr) 
@@ -937,7 +980,7 @@ cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type)
         cm_GetServerNoLock(tsp);
 
     /* drop big table lock */
-    lock_ReleaseWrite(&cm_serverLock);
+    lock_ReleaseRead(&cm_serverLock);
        
     /* return what we found */
     return tsp;
@@ -1019,8 +1062,10 @@ LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
     int first = 1;
     cm_serverRef_t *tsrp;
 
-    lock_ObtainWrite(&cm_serverLock);
+    lock_ObtainRead(&cm_serverLock);
     for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
+        if (tsrp->status == srv_deleted)
+            continue;
         if (first)
             first = 0;
         else
@@ -1028,13 +1073,13 @@ LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
         sum ^= (LONG_PTR) tsrp->server;
     }
 
-    lock_ReleaseWrite(&cm_serverLock);
+    lock_ReleaseRead(&cm_serverLock);
     return sum;
 }
 
 /*
 ** Insert a server into the server list keeping the list sorted in 
-** asending order of ipRank. 
+** ascending order of ipRank. 
 ** 
 ** The refCount of the cm_serverRef_t is increased
 */
@@ -1160,16 +1205,32 @@ void cm_RandomizeServer(cm_serverRef_t** list)
 void cm_FreeServer(cm_server_t* serverp)
 {
     cm_server_vols_t * tsrvp, *nextp;
+    int delserver = 0;
 
     cm_PutServerNoLock(serverp);
     if (serverp->refCount == 0)
     {
-        /* we need to check to ensure that all of the connections
+        /* 
+         * we need to check to ensure that all of the connections
          * for this server have a 0 refCount; otherwise, they will
          * not be garbage collected 
+         *
+         * must drop the cm_serverLock because cm_GCConnections
+         * obtains the cm_connLock and that comes first in the 
+         * lock hierarchy.  
          */
+        lock_ReleaseWrite(&cm_serverLock);
         cm_GCConnections(serverp);  /* connsp */
+        lock_ObtainWrite(&cm_serverLock);
+    }
 
+
+    /* 
+     * Once we have the cm_serverLock locked check to make
+     * sure the refCount is still zero before removing the 
+     * server entirely.
+     */
+    if (serverp->refCount == 0) {
        if (!(serverp->flags & CM_SERVERFLAG_PREF_SET)) {
             switch (serverp->type) {
             case CM_SERVER_VLDB:
@@ -1205,6 +1266,7 @@ void cm_FreeServer(cm_server_t* serverp)
     }
 }
 
+/* Called with cm_serverLock write locked */
 void cm_RemoveVolumeFromServer(cm_server_t * serverp, afs_uint32 volID)
 {
     cm_server_vols_t * tsrvp;
@@ -1229,6 +1291,9 @@ void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
     cm_serverRef_t  **nextp = 0;
     cm_serverRef_t  * next = 0;
 
+       if (*list == NULL)
+               return;
+
     lock_ObtainWrite(&cm_serverLock);
 
     while (*current)
@@ -1254,3 +1319,62 @@ void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
   
     lock_ReleaseWrite(&cm_serverLock);
 }
+
+/* dump all servers to a file. 
+ * cookie is used to identify this batch for easy parsing, 
+ * and it a string provided by a caller 
+ */
+int cm_DumpServers(FILE *outputFile, char *cookie, int lock)
+{
+    int zilch;
+    cm_server_t *tsp;
+    char output[1024];
+    char uuidstr[128];
+    char hoststr[16];
+
+    if (lock)
+        lock_ObtainRead(&cm_serverLock);
+  
+    sprintf(output, "%s - dumping servers - cm_numFileServers=%d, cm_numVldbServers=%d\r\n", 
+            cookie, cm_numFileServers, cm_numVldbServers);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+  
+    for (tsp = cm_allServersp; tsp; tsp=tsp->allNextp)
+    {
+        char * type;
+        char * down;
+
+        switch (tsp->type) {
+        case CM_SERVER_VLDB:
+            type = "vldb";
+            break;
+        case CM_SERVER_FILE:
+            type = "file";
+            break;
+        default:
+            type = "unknown";
+        }
+
+        afsUUID_to_string(&tsp->uuid, uuidstr, sizeof(uuidstr));
+        afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
+        down = ctime(&tsp->downTime);
+        down[strlen(down)-1] = '\0';
+
+        sprintf(output, "%s - tsp=0x%p cell=%s addr=%-15s uuid=%s type=%s caps=0x%x flags=0x%x waitCount=%u rank=%u downTime=\"%s\" refCount=%u\r\n",
+                 cookie, tsp, tsp->cellp ? tsp->cellp->name : "", hoststr, uuidstr, type, 
+                 tsp->capabilities, tsp->flags, tsp->waitCount, tsp->ipRank, 
+                 (tsp->flags & CM_SERVERFLAG_DOWN) ?  down : "up",
+                 tsp->refCount);
+        WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+    }
+    sprintf(output, "%s - Done dumping servers.\r\n", cookie);
+    WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
+  
+    if (lock)
+       lock_ReleaseRead(&cm_serverLock);
+
+    return (0);     
+}
+
+
+