windows-multi-checkservers-20080205
authorJeffrey Altman <jaltman@secure-endpoints.com>
Tue, 5 Feb 2008 17:37:21 +0000 (17:37 +0000)
committerJeffrey Altman <jaltman@secure-endpoints.com>
Tue, 5 Feb 2008 17:37:21 +0000 (17:37 +0000)
LICENSE MIT

Instead of probing servers one at a time, use multi_rx versions of
RXAFS_GetCapabilities, RXAFS_GetTime and VL_ProbeServer to permit
simultaneous probing of servers in constant time regardless of the
number of servers.

src/WINNT/afsd/NTMakefile
src/WINNT/afsd/cm.h
src/WINNT/afsd/cm_server.c
src/WINNT/afsd/cm_server.h

index e45f16c..bdce860 100644 (file)
@@ -372,13 +372,13 @@ AFSD_SDKLIBS =\
 
 AFSD_EXELIBS =\
        $(DESTDIR)\lib\libosi.lib \
-       $(DESTDIR)\lib\afsrpc.lib \
-       $(DESTDIR)\lib\afsauthent.lib \
        $(DESTDIR)\lib\afs\mtafsvldb.lib \
        $(DESTDIR)\lib\afs\mtafsint.lib \
+       $(DESTDIR)\lib\afsrpc.lib \
+       $(DESTDIR)\lib\afs\afsutil.lib \
+       $(DESTDIR)\lib\afsauthent.lib \
        $(DESTDIR)\lib\libafsconf.lib \
        $(DESTDIR)\lib\afs\afsreg.lib \
-       $(DESTDIR)\lib\afs\afsutil.lib \
        $(DESTDIR)\lib\afspthread.lib \
         $(LANAHELPERLIB)
 
index 4f327b5..a1776da 100644 (file)
 #define AFS_PTHREAD_ENV 1
 #endif
 #include <rx/rx.h>
-
-/* from .xg file */
-/* FIXME: these were "long" but Windows NT wants "int" */
-int VL_GetEntryByID(struct rx_connection *, afs_int32, afs_int32, struct vldbentry *);
-int VL_GetEntryByNameO(struct rx_connection *, char *, struct vldbentry *);
-int VL_ProbeServer(struct rx_connection *);
-int VL_GetEntryBYIDN(struct rx_connection *, afs_int32, afs_int32, struct nvldbentry *);
-int VL_GetEntryByNameN(struct rx_connection *, char *, struct nvldbentry *);
-
-/* from .xg file */
-int StartRXAFS_FetchData (struct rx_call *,
-       struct AFSFid *Fid,
-       afs_int32 Pos, 
-       afs_int32 Length);
-int EndRXAFS_FetchData (struct rx_call *,
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSCallBack *CallBack, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_FetchACL(struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSOpaque *AccessList, 
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_FetchStatus (struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSCallBack *CallBack, 
-       struct AFSVolSync *Sync);
-
-int StartRXAFS_StoreData (struct rx_call *,
-       struct AFSFid *Fid, 
-       struct AFSStoreStatus *InStatus, 
-       afs_uint32 Pos, 
-       afs_uint32 Length, 
-       afs_uint32 FileLength);
-
-int EndRXAFS_StoreData(struct rx_call *,
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSVolSync *Sync);
-
-int StartRXAFS_FetchData64(struct rx_call *z_call,
-       struct AFSFid * Fid,
-       afs_int64 Pos,
-       afs_int64 Length);
-
-int EndRXAFS_FetchData64(struct rx_call *z_call,
-        struct AFSFetchStatus * OutStatus,
-       struct AFSCallBack * CallBack,
-       struct AFSVolSync * Sync);
-
-afs_int32 SRXAFS_FetchData64(struct rx_call *z_call,
-       struct AFSFid * Fid,
-       afs_int64 Pos,
-       afs_int64 Length,
-       struct AFSFetchStatus * OutStatus,
-       struct AFSCallBack * CallBack,
-       struct AFSVolSync * Sync);
-
-int StartRXAFS_StoreData64(struct rx_call *z_call,
-       struct AFSFid * Fid,
-       struct AFSStoreStatus * InStatus,
-       afs_uint64 Pos,
-       afs_uint64 Length,
-       afs_uint64 FileLength);
-
-int EndRXAFS_StoreData64(struct rx_call *z_call,
-       struct AFSFetchStatus * OutStatus,
-       struct AFSVolSync * Sync);
-
-afs_int32 SRXAFS_StoreData64(struct rx_call *z_call,
-       struct AFSFid * Fid,
-       struct AFSStoreStatus * InStatus,
-       afs_uint64 Pos,
-       afs_uint64 Length,
-       afs_uint64 FileLength,
-       struct AFSFetchStatus * OutStatus,
-       struct AFSVolSync * Sync);
-
-int RXAFS_StoreACL (struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSOpaque *AccessList,  
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_StoreStatus(struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSStoreStatus *InStatus, 
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_RemoveFile (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *namep,
-       struct AFSFetchStatus *OutStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_CreateFile (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *Name,
-       struct AFSStoreStatus *InStatus, 
-       struct AFSFid *OutFid, 
-       struct AFSFetchStatus *OutFidStatus, 
-       struct AFSFetchStatus *OutDirStatus, 
-       struct AFSCallBack *CallBack, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_Rename (struct rx_connection *,
-       struct AFSFid *OldDirFid, 
-       char *OldName,
-       struct AFSFid *NewDirFid, 
-       char *NewName,
-       struct AFSFetchStatus *OutOldDirStatus, 
-       struct AFSFetchStatus *OutNewDirStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_Symlink (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *name,
-       char *LinkContents,
-       struct AFSStoreStatus *InStatus,
-       struct AFSFid *OutFid, 
-       struct AFSFetchStatus *OutFidStatus, 
-       struct AFSFetchStatus *OutDirStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_Link (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *Name,
-       struct AFSFid *ExistingFid, 
-       struct AFSFetchStatus *OutFidStatus, 
-       struct AFSFetchStatus *OutDirStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_MakeDir (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *name,
-       struct AFSStoreStatus *InStatus, 
-       struct AFSFid *OutFid, 
-       struct AFSFetchStatus *OutFidStatus, 
-       struct AFSFetchStatus *OutDirStatus, 
-       struct AFSCallBack *CallBack, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_RemoveDir (struct rx_connection *,
-       struct AFSFid *DirFid, 
-       char *Name,
-       struct AFSFetchStatus *OutDirStatus, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_GetStatistics (struct rx_connection *,
-       struct ViceStatistics *Statistics);
-
-int RXAFS_GiveUpCallBacks (struct rx_connection *,
-       struct AFSCBFids *Fids_Array,
-       struct AFSCBs *CallBacks_Array);
-
-int RXAFS_GetVolumeInfo (struct rx_connection *,
-       char *VolumeName,
-       struct VolumeInfo *Volumeinfo);
-
-int RXAFS_GetVolumeStatus (struct rx_connection *,
-       afs_int32 Volumeid, 
-       struct AFSFetchVolumeStatus *Volumestatus, 
-       char **name,
-        char **offlineMsg,
-        char **motd);
-
-int RXAFS_SetVolumeStatus (struct rx_connection *,
-       afs_int32 Volumeid, 
-       struct AFSStoreVolumeStatus *Volumestatus,
-       char *name,
-       char *olm,
-       char *motd);
-
-int RXAFS_GetRootVolume (struct rx_connection *,
-       char **VolumeName);
-
-int RXAFS_CheckToken (struct rx_connection *,
-       afs_int32 ViceId,
-       struct AFSOpaque *token);
-
-int RXAFS_GetTime (struct rx_connection *,
-       afs_uint32 *Seconds, 
-       afs_uint32 *USeconds);
-
-int RXAFS_BulkStatus (struct rx_connection *,
-       struct AFSCBFids *FidsArray,
-       struct AFSBulkStats *StatArray,
-       struct AFSCBs *CBArray,
-       struct AFSVolSync *Sync);
-
-int RXAFS_SetLock (struct rx_connection *,
-       struct AFSFid *Fid, 
-       int Type, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_ExtendLock (struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSVolSync *Sync);
-
-int RXAFS_ReleaseLock (struct rx_connection *,
-       struct AFSFid *Fid, 
-       struct AFSVolSync *Sync);
-
-/* This interface is to supported the AFS/DFS Protocol Translator */
-int RXAFS_Lookup (struct rx_connection *,
-       struct AFSFid *DirFid,
-       char *Name,
-       struct AFSFid *OutFid,
-       struct AFSFetchStatus *OutFidStatus,
-       struct AFSFetchStatus *OutDirStatus,
-       struct AFSCallBack *CallBack,
-       struct AFSVolSync *Sync);
+#include <afs/vldbint.h>
+#include <afs/afsint.h>
 
 #define CM_DEFAULT_CALLBACKPORT         7001
 
index 3ff4db4..7f23b78 100644 (file)
@@ -25,6 +25,8 @@
 osi_rwlock_t cm_serverLock;
 
 cm_server_t *cm_allServersp;
+afs_uint32   cm_numFileServers = 0;
+afs_uint32   cm_numVldbServers = 0;
 
 void
 cm_ForceNewConnectionsAllServers(void)
@@ -192,8 +194,9 @@ cm_PingServer(cm_server_t *tsp)
     lock_ReleaseMutex(&tsp->mx);
 }
 
-
-void cm_CheckServers(long flags, cm_cell_t *cellp)
+#define MULTI_CHECKSERVERS 1
+#ifndef MULTI_CHECKSERVERS
+void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
 {
     /* ping all file servers, up or down, with unauthenticated connection,
      * to find out whether we have all our callbacks from the server still.
@@ -247,6 +250,497 @@ void cm_CheckServers(long flags, cm_cell_t *cellp)
     }
     lock_ReleaseWrite(&cm_serverLock);
 }       
+#else /* MULTI_CHECKSERVERS */
+void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
+{
+    /* 
+     * The goal of this function is to probe simultaneously 
+     * probe all of the up/down servers (vldb/file) as 
+     * specified by flags in the minimum number of RPCs.
+     * Effectively that means use one multi_RXAFS_GetCapabilities()
+     * followed by possibly one multi_RXAFS_GetTime() and 
+     * one multi_VL_ProbeServer().
+     *
+     * To make this work we must construct the list of vldb
+     * and file servers that are to be probed as well as the
+     * associated data structures.
+     */
+
+    int srvAddrCount = 0;
+    struct srvAddr **addrs = NULL;
+    cm_conn_t **conns = NULL;
+    int nconns = 0;
+    struct rx_connection **rxconns = NULL;
+    cm_req_t req;
+    afs_uint32 i, j;
+    afs_int32 *conntimer, *results;
+    Capabilities *caps = NULL;
+    cm_server_t ** serversp, *tsp;
+    afs_uint32 isDown, wasDown;
+    afs_uint32 code;
+    time_t start, end, *deltas;
+    afs_int32 secs;
+    afs_int32 usecs;
+    char hoststr[16];
+
+    cm_InitReq(&req);
+
+    j = max(cm_numFileServers,cm_numVldbServers);
+    conns = (cm_conn_t **)malloc(j * sizeof(cm_conn_t *));
+    rxconns = (struct rx_connection **)malloc(j * sizeof(struct rx_connection *));
+    conntimer = (afs_int32 *)malloc(j * sizeof (afs_int32));
+    deltas = (time_t *)malloc(j * sizeof (time_t));
+    results = (afs_int32 *)malloc(j * sizeof (afs_int32));
+    serversp = (cm_server_t **)malloc(j * sizeof(cm_server_t *));
+    caps = (Capabilities *)malloc(j * sizeof(Capabilities));
+
+    memset(caps, 0, j * sizeof(Capabilities));
+
+    if (!(flags & CM_FLAG_CHECKVLDBSERVERS)) {
+        lock_ObtainWrite(&cm_serverLock);
+        nconns = 0;
+        for (nconns=0, tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+            if (tsp->type != CM_SERVER_FILE || 
+                tsp->cellp == NULL ||           /* SetPref only */
+                cellp && cellp != tsp->cellp)
+                continue;
+
+            cm_GetServerNoLock(tsp);
+            lock_ReleaseWrite(&cm_serverLock);
+
+            lock_ObtainMutex(&tsp->mx);
+            isDown = tsp->flags & CM_SERVERFLAG_DOWN;
+
+            if ((tsp->flags & CM_SERVERFLAG_PINGING) ||
+                !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
+                   (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
+                lock_ReleaseMutex(&tsp->mx);
+                lock_ObtainWrite(&cm_serverLock);
+                continue;
+            }
+
+            tsp->flags |= CM_SERVERFLAG_PINGING;
+            lock_ReleaseMutex(&tsp->mx);
+
+            serversp[nconns] = tsp;
+            code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
+            if (code) {
+                   lock_ObtainWrite(&cm_serverLock);
+                cm_PutServerNoLock(tsp);
+                continue;
+            }
+            lock_ObtainWrite(&cm_serverLock);
+                       rxconns[nconns] = cm_GetRxConn(conns[nconns]);
+            if (conntimer[nconns] = (isDown ? 1 : 0))
+                rx_SetConnDeadTime(rxconns[nconns], 10);
+
+            nconns++;
+        }
+        lock_ReleaseWrite(&cm_serverLock);
+
+        /* Perform the multi call */
+        start = time(NULL);
+        multi_Rx(rxconns,nconns)
+        {
+            multi_RXAFS_GetCapabilities(&caps[multi_i]);
+            results[multi_i]=multi_error;
+        } multi_End;
+
+
+        /* Process results of servers that support RXAFS_GetCapabilities */
+        for (i=0; i<nconns; i++) {
+            /* Leave the servers that did not support GetCapabilities alone */
+            if (results[i] == RXGEN_OPCODE)
+                continue;
+
+            if (conntimer[i])
+                rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
+            rx_PutConnection(rxconns[i]);
+            cm_PutConn(conns[i]);
+
+            tsp = serversp[i];
+            cm_GCConnections(tsp);
+
+            lock_ObtainMutex(&tsp->mx);
+            wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
+
+            if (results[i] >= 0)  {
+                /* mark server as up */
+                tsp->flags &= ~CM_SERVERFLAG_DOWN;
+                tsp->downTime = 0;
+
+                /* we currently handle 32-bits of capabilities */
+                if (caps[i].Capabilities_len > 0) {
+                    tsp->capabilities = caps[i].Capabilities_val[0];
+                    free(caps[i].Capabilities_val);
+                    caps[i].Capabilities_len = 0;
+                    caps[i].Capabilities_val = 0;
+                } else {
+                    tsp->capabilities = 0;
+                }
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                /* mark server as down */
+                if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+                    tsp->flags |= CM_SERVERFLAG_DOWN;
+                    tsp->downTime = time(NULL);
+                }
+                if (code != VRESTARTING)
+                    cm_ForceNewConnections(tsp);
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (!wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (tsp->waitCount == 0)
+                tsp->flags &= ~CM_SERVERFLAG_PINGING;
+            else 
+                osi_Wakeup((LONG_PTR)tsp);
+            
+            lock_ReleaseMutex(&tsp->mx);
+
+            cm_PutServer(tsp);
+        }
+
+        /* 
+         * At this point we have handled any responses that did not indicate
+         * that RXAFS_GetCapabilities is not supported.
+         */
+        for ( i=0, j=0; i<nconns; i++) {
+            if (results[i] == RXGEN_OPCODE && i != j) {
+                conns[j] = conns[i];
+                rxconns[j] = rxconns[i];
+                serversp[j] = serversp[i];
+                j++;
+            }
+        }
+        nconns = j;
+
+        /* Perform the multi call */
+        start = time(NULL);
+        multi_Rx(rxconns,nconns)
+        {
+            secs = usecs = 0;
+            multi_RXAFS_GetTime(&secs, &usecs);
+            end = time(NULL);
+            results[multi_i]=multi_error;
+            if ((start == end) && !multi_error)
+                deltas[multi_i] = end - secs;
+        } multi_End;
+
+
+        /* Process Results of servers that only support RXAFS_GetTime */
+        for (i=0; i<nconns; i++) {
+            /* Leave the servers that did not support GetCapabilities alone */
+            if (conntimer[i])
+                rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
+            rx_PutConnection(rxconns[i]);
+            cm_PutConn(conns[i]);
+
+            tsp = serversp[i];
+            cm_GCConnections(tsp);
+
+            lock_ObtainMutex(&tsp->mx);
+            wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
+
+            if (results[i] >= 0)  {
+                /* mark server as up */
+                tsp->flags &= ~CM_SERVERFLAG_DOWN;
+                tsp->downTime = 0;
+                tsp->capabilities = 0;
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                /* mark server as down */
+                if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+                    tsp->flags |= CM_SERVERFLAG_DOWN;
+                    tsp->downTime = time(NULL);
+                }
+                if (code != VRESTARTING)
+                    cm_ForceNewConnections(tsp);
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (!wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (tsp->waitCount == 0)
+                tsp->flags &= ~CM_SERVERFLAG_PINGING;
+            else 
+                osi_Wakeup((LONG_PTR)tsp);
+            
+            lock_ReleaseMutex(&tsp->mx);
+
+            cm_PutServer(tsp);
+        }
+    }
+
+    if (!(flags & CM_FLAG_CHECKFILESERVERS)) {
+        lock_ObtainWrite(&cm_serverLock);
+        nconns = 0;
+        for (nconns=0, tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
+            if (tsp->type != CM_SERVER_VLDB ||
+                tsp->cellp == NULL ||           /* SetPref only */
+                cellp && cellp != tsp->cellp)
+                continue;
+
+            cm_GetServerNoLock(tsp);
+            lock_ReleaseWrite(&cm_serverLock);
+
+            lock_ObtainMutex(&tsp->mx);
+            isDown = tsp->flags & CM_SERVERFLAG_DOWN;
+
+            if ((tsp->flags & CM_SERVERFLAG_PINGING) ||
+                !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
+                   (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
+                lock_ReleaseMutex(&tsp->mx);
+                lock_ObtainWrite(&cm_serverLock);
+                continue;
+            }
+
+            tsp->flags |= CM_SERVERFLAG_PINGING;
+            lock_ReleaseMutex(&tsp->mx);
+
+            serversp[nconns] = tsp;
+            code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
+            if (code) {
+                   lock_ObtainWrite(&cm_serverLock);
+                cm_PutServerNoLock(tsp);
+                continue;
+            }
+            lock_ObtainWrite(&cm_serverLock);
+            rxconns[nconns] = cm_GetRxConn(conns[nconns]);
+            if (conntimer[nconns] = (isDown ? 1 : 0))
+                rx_SetConnDeadTime(rxconns[nconns], 10);
+
+            nconns++;
+        }
+        lock_ReleaseWrite(&cm_serverLock);
+
+        /* Perform the multi call */
+        start = time(NULL);
+        multi_Rx(rxconns,nconns)
+        {
+            multi_VL_ProbeServer();
+            results[multi_i]=multi_error;
+        } multi_End;
+
+
+        /* Process results of servers that support RXAFS_GetCapabilities */
+        for (i=0; i<nconns; i++) {
+            /* Leave the servers that did not support GetCapabilities alone */
+            if (results[i] == RXGEN_OPCODE)
+                continue;
+
+            if (conntimer[i])
+                rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
+            rx_PutConnection(rxconns[i]);
+            cm_PutConn(conns[i]);
+
+            tsp = serversp[i];
+            cm_GCConnections(tsp);
+
+            lock_ObtainMutex(&tsp->mx);
+            wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
+
+            if (results[i] >= 0)  {
+                /* mark server as up */
+                tsp->flags &= ~CM_SERVERFLAG_DOWN;
+                tsp->downTime = 0;
+                tsp->capabilities = 0;
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                /* mark server as down */
+                if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
+                    tsp->flags |= CM_SERVERFLAG_DOWN;
+                    tsp->downTime = time(NULL);
+                }
+                if (code != VRESTARTING)
+                    cm_ForceNewConnections(tsp);
+
+                osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
+                          osi_LogSaveString(afsd_logp, hoststr), 
+                          tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
+                          tsp->capabilities);
+
+                /* Now update the volume status if necessary */
+                if (!wasDown) {
+                    cm_server_vols_t * tsrvp;
+                    cm_volume_t * volp;
+                    int i;
+
+                    for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
+                        for (i=0; i<NUM_SERVER_VOLS; i++) {
+                            if (tsrvp->ids[i] != 0) {
+                                cm_InitReq(&req);
+
+                                lock_ReleaseMutex(&tsp->mx);
+                                code = cm_GetVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
+                                                         &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
+                                lock_ObtainMutex(&tsp->mx);
+                                if (code == 0) {
+                                    cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
+                                    cm_PutVolume(volp);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (tsp->waitCount == 0)
+                tsp->flags &= ~CM_SERVERFLAG_PINGING;
+            else 
+                osi_Wakeup((LONG_PTR)tsp);
+            
+            lock_ReleaseMutex(&tsp->mx);
+
+            cm_PutServer(tsp);
+        }
+    }
+
+    free(conns);
+    free(rxconns);
+    free(conntimer);
+    free(deltas);
+    free(results);
+    free(caps);
+}
+#endif /* MULTI_CHECKSERVERS */
 
 void cm_InitServer(void)
 {
@@ -380,6 +874,16 @@ cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cell
         lock_ObtainWrite(&cm_serverLock);      /* get server lock */
         tsp->allNextp = cm_allServersp;
         cm_allServersp = tsp;
+
+        switch (type) {
+        case CM_SERVER_VLDB:
+            cm_numVldbServers++;
+            break;      
+        case CM_SERVER_FILE:
+            cm_numFileServers++;
+            break;
+        }
+
         lock_ReleaseWrite(&cm_serverLock);     /* release server lock */
 
         if ( !(flags & CM_FLAG_NOPROBE) ) {
@@ -659,6 +1163,15 @@ void cm_FreeServer(cm_server_t* serverp)
         cm_GCConnections(serverp);  /* connsp */
 
        if (!(serverp->flags & CM_SERVERFLAG_PREF_SET)) {
+            switch (serverp->type) {
+            case CM_SERVER_VLDB:
+                cm_numVldbServers--;
+                break;      
+            case CM_SERVER_FILE:
+                cm_numFileServers--;
+                break;
+            }
+
            lock_FinalizeMutex(&serverp->mx);
            if ( cm_allServersp == serverp )
                cm_allServersp = serverp->allNextp;
index 9a3cd7c..ab57b0d 100644 (file)
@@ -98,7 +98,7 @@ extern osi_rwlock_t cm_serverLock;
 
 extern void cm_InitServer(void);
 
-extern void cm_CheckServers(long flags, struct cm_cell *cellp);
+extern void cm_CheckServers(afs_uint32 flags, struct cm_cell *cellp);
 
 extern cm_server_t *cm_allServersp;