2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
24 #include <WINNT\syscfg.h>
25 #include <WINNT/afsreg.h>
30 osi_rwlock_t cm_serverLock;
31 osi_rwlock_t cm_syscfgLock;
33 cm_server_t *cm_allServersp;
34 afs_uint32 cm_numFileServers = 0;
35 afs_uint32 cm_numVldbServers = 0;
38 cm_ForceNewConnectionsAllServers(void)
42 lock_ObtainRead(&cm_serverLock);
43 for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
44 cm_GetServerNoLock(tsp);
45 lock_ReleaseRead(&cm_serverLock);
46 cm_ForceNewConnections(tsp);
47 lock_ObtainRead(&cm_serverLock);
48 cm_PutServerNoLock(tsp);
50 lock_ReleaseRead(&cm_serverLock);
54 * lock_ObtainMutex must be held prior to calling
58 cm_RankServer(cm_server_t * tsp)
60 afs_int32 code = 0; /* start with "success" */
61 struct rx_debugPeer tpeer;
62 struct rx_peer * rxPeer;
65 afs_uint64 perfRank = 0;
69 int isDown = (tsp->flags & CM_SERVERFLAG_DOWN);
70 void *peerRpcStats = NULL;
71 afs_uint64 opcode = 0;
76 opcode = opcode_VL_ProbeServer;
80 opcode = opcode_RXAFS_GetCapabilities;
86 cm_SetServerIPRank(tsp);
92 * There are three potential components to the ranking:
93 * 1. Any administrative set preference whether it be
94 * via "fs setserverprefs", registry or dns.
96 * 2. Network subnet mask comparison.
98 * 3. Performance data.
100 * If there is an administrative rank, that is the
101 * the primary factor. If not the primary factor
102 * is the network ranking.
105 code = rx_GetLocalPeers(tsp->addr.sin_addr.s_addr, port, &tpeer);
107 peerRpcStats = rx_CopyPeerRPCStats(opcode, tsp->addr.sin_addr.s_addr, port);
108 if (peerRpcStats == NULL && tsp->type == CM_SERVER_FILE)
109 peerRpcStats = rx_CopyPeerRPCStats(opcode_RXAFS_GetTime, tsp->addr.sin_addr.s_addr, port);
111 afs_uint64 execTimeSum = _8THMSEC(RPCOpStat_ExecTimeSum(peerRpcStats));
112 afs_uint64 queueTimeSum = _8THMSEC(RPCOpStat_QTimeSum(peerRpcStats));
113 afs_uint64 numCalls = RPCOpStat_NumCalls(peerRpcStats);
116 rtt = (execTimeSum - queueTimeSum) / numCalls;
118 rx_ReleaseRPCStats(peerRpcStats);
121 if (rtt == 0 && tpeer.rtt) {
128 perfRank += (6000 * log_rtt / 5000) * 5000;
130 if (tsp->type == CM_SERVER_FILE) {
131 /* give an edge to servers with high congestion windows */
132 perfRank -= (tpeer.cwind - 1)* 15;
137 if (tsp->adminRank) {
138 newRank = tsp->adminRank * 0.8;
139 newRank += tsp->ipRank * 0.2;
141 newRank = tsp->ipRank;
145 newRank += perfRank * 0.1;
147 newRank += (rand() & 0x000f); /* randomize */
149 if (newRank > 0xFFFF)
150 osi_Log1(afsd_logp, "new server rank %I64u exceeds 0xFFFF", newRank);
153 * If the ranking changes by more than the randomization
154 * factor, update the server reference lists.
156 if (abs(newRank - tsp->activeRank) > 0xf) {
157 tsp->activeRank = newRank;
159 lock_ReleaseMutex(&tsp->mx);
163 * find volumes which might have RO copy
164 * on server and change the ordering of
167 cm_ChangeRankVolume(tsp);
170 /* set preferences for an existing vlserver */
171 cm_ChangeRankCellVLServer(tsp);
174 lock_ObtainMutex(&tsp->mx);
182 cm_PingServer(cm_server_t *tsp)
187 struct rx_connection * rxconnp;
188 Capabilities caps = {0, 0};
192 lock_ObtainMutex(&tsp->mx);
193 if (tsp->flags & CM_SERVERFLAG_PINGING) {
195 osi_SleepM((LONG_PTR)tsp, &tsp->mx);
196 lock_ObtainMutex(&tsp->mx);
198 if (tsp->waitCount == 0)
199 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_PINGING);
201 osi_Wakeup((LONG_PTR)tsp);
202 lock_ReleaseMutex(&tsp->mx);
205 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_PINGING);
206 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
207 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
208 lock_ReleaseMutex(&tsp->mx);
210 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp);
212 /* now call the appropriate ping call. Drop the timeout if
213 * the server is known to be down, so that we don't waste a
214 * lot of time retiming out down servers.
217 osi_Log4(afsd_logp, "cm_PingServer server %s (%s) was %s with caps 0x%x",
218 osi_LogSaveString(afsd_logp, hoststr),
219 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
220 wasDown ? "down" : "up",
223 rxconnp = cm_GetRxConn(connp);
225 rx_SetConnDeadTime(rxconnp, 10);
226 if (tsp->type == CM_SERVER_VLDB) {
227 code = VL_ProbeServer(rxconnp);
231 code = RXAFS_GetCapabilities(rxconnp, &caps);
234 rx_SetConnDeadTime(rxconnp, ConnDeadtimeout);
235 rx_PutConnection(rxconnp);
237 } /* got an unauthenticated connection to this server */
239 lock_ObtainMutex(&tsp->mx);
240 if (code >= 0 || code == RXGEN_OPCODE || code == RX_CALL_BUSY) {
241 /* mark server as up */
242 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
245 /* we currently handle 32-bits of capabilities */
246 if (code != RXGEN_OPCODE && code != RX_CALL_BUSY &&
247 caps.Capabilities_len > 0) {
248 tsp->capabilities = caps.Capabilities_val[0];
249 xdr_free((xdrproc_t) xdr_Capabilities, &caps);
250 caps.Capabilities_len = 0;
251 caps.Capabilities_val = 0;
253 tsp->capabilities = 0;
256 osi_Log3(afsd_logp, "cm_PingServer server %s (%s) is up with caps 0x%x",
257 osi_LogSaveString(afsd_logp, hoststr),
258 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
261 /* Now update the volume status if necessary */
263 cm_server_vols_t * tsrvp;
267 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
268 for (i=0; i<NUM_SERVER_VOLS; i++) {
269 if (tsrvp->ids[i] != 0) {
272 lock_ReleaseMutex(&tsp->mx);
273 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
274 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
275 lock_ObtainMutex(&tsp->mx);
277 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
285 /* mark server as down */
286 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
287 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN);
288 tsp->downTime = time(NULL);
290 if (code != VRESTARTING) {
291 lock_ReleaseMutex(&tsp->mx);
292 cm_ForceNewConnections(tsp);
293 lock_ObtainMutex(&tsp->mx);
295 osi_Log3(afsd_logp, "cm_PingServer server %s (%s) is down with caps 0x%x",
296 osi_LogSaveString(afsd_logp, hoststr),
297 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
300 /* Now update the volume status if necessary */
302 cm_server_vols_t * tsrvp;
306 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
307 for (i=0; i<NUM_SERVER_VOLS; i++) {
308 if (tsrvp->ids[i] != 0) {
311 lock_ReleaseMutex(&tsp->mx);
312 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
313 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
314 lock_ObtainMutex(&tsp->mx);
316 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
325 if (tsp->waitCount == 0)
326 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_PINGING);
328 osi_Wakeup((LONG_PTR)tsp);
329 lock_ReleaseMutex(&tsp->mx);
337 lock_ObtainRead(&cm_serverLock);
338 for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
339 cm_GetServerNoLock(tsp);
340 lock_ReleaseRead(&cm_serverLock);
342 lock_ObtainMutex(&tsp->mx);
344 /* if the server is not down, rank the server */
345 if(!(tsp->flags & CM_SERVERFLAG_DOWN))
348 lock_ReleaseMutex(&tsp->mx);
350 lock_ObtainRead(&cm_serverLock);
351 cm_PutServerNoLock(tsp);
353 lock_ReleaseRead(&cm_serverLock);
356 static void cm_CheckServersSingular(afs_uint32 flags, cm_cell_t *cellp)
358 /* ping all file servers, up or down, with unauthenticated connection,
359 * to find out whether we have all our callbacks from the server still.
360 * Also, ping down VLDBs.
368 lock_ObtainRead(&cm_serverLock);
369 for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
370 cm_GetServerNoLock(tsp);
371 lock_ReleaseRead(&cm_serverLock);
373 /* now process the server */
374 lock_ObtainMutex(&tsp->mx);
377 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
378 isFS = tsp->type == CM_SERVER_FILE;
379 isVLDB = tsp->type == CM_SERVER_VLDB;
381 /* only do the ping if the cell matches the requested cell, or we're
382 * matching all cells (cellp == NULL), and if we've requested to ping
383 * this type of {up, down} servers.
385 if ((cellp == NULL || cellp == tsp->cellp) &&
386 ((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
387 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS))) &&
388 ((!(flags & CM_FLAG_CHECKVLDBSERVERS) ||
389 isVLDB && (flags & CM_FLAG_CHECKVLDBSERVERS)) &&
390 (!(flags & CM_FLAG_CHECKFILESERVERS) ||
391 isFS && (flags & CM_FLAG_CHECKFILESERVERS)))) {
393 } /* we're supposed to check this up/down server */
394 lock_ReleaseMutex(&tsp->mx);
396 /* at this point, we've adjusted the server state, so do the ping and
402 /* also, run the GC function for connections on all of the
403 * server's connections.
405 cm_GCConnections(tsp);
407 lock_ObtainRead(&cm_serverLock);
408 cm_PutServerNoLock(tsp);
410 lock_ReleaseRead(&cm_serverLock);
413 static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp)
416 * The goal of this function is to probe simultaneously
417 * probe all of the up/down servers (vldb/file) as
418 * specified by flags in the minimum number of RPCs.
419 * Effectively that means use one multi_RXAFS_GetCapabilities()
420 * followed by possibly one multi_RXAFS_GetTime() and
421 * one multi_VL_ProbeServer().
423 * To make this work we must construct the list of vldb
424 * and file servers that are to be probed as well as the
425 * associated data structures.
428 int srvAddrCount = 0;
429 struct srvAddr **addrs = NULL;
430 cm_conn_t **conns = NULL;
431 struct rx_connection **rxconns = NULL;
433 afs_int32 i, nconns = 0, maxconns;
434 afs_int32 *conntimer, *results;
435 Capabilities *caps = NULL;
436 cm_server_t ** serversp, *tsp;
437 afs_uint32 isDown, wasDown;
443 maxconns = max(cm_numFileServers,cm_numVldbServers);
447 conns = (cm_conn_t **)malloc(maxconns * sizeof(cm_conn_t *));
448 rxconns = (struct rx_connection **)malloc(maxconns * sizeof(struct rx_connection *));
449 conntimer = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
450 results = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
451 serversp = (cm_server_t **)malloc(maxconns * sizeof(cm_server_t *));
452 caps = (Capabilities *)malloc(maxconns * sizeof(Capabilities));
454 memset(caps, 0, maxconns * sizeof(Capabilities));
456 if ((flags & CM_FLAG_CHECKFILESERVERS) ||
457 !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
459 lock_ObtainRead(&cm_serverLock);
460 for (nconns=0, tsp = cm_allServersp; tsp && nconns < maxconns; tsp = tsp->allNextp) {
461 if (tsp->type != CM_SERVER_FILE ||
462 tsp->cellp == NULL || /* SetPref only */
463 cellp && cellp != tsp->cellp)
466 cm_GetServerNoLock(tsp);
467 lock_ReleaseRead(&cm_serverLock);
469 lock_ObtainMutex(&tsp->mx);
470 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
472 if ((tsp->flags & CM_SERVERFLAG_PINGING) ||
473 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
474 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
475 lock_ReleaseMutex(&tsp->mx);
476 lock_ObtainRead(&cm_serverLock);
477 cm_PutServerNoLock(tsp);
481 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_PINGING);
482 lock_ReleaseMutex(&tsp->mx);
484 serversp[nconns] = tsp;
485 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
487 lock_ObtainRead(&cm_serverLock);
488 cm_PutServerNoLock(tsp);
491 lock_ObtainRead(&cm_serverLock);
492 rxconns[nconns] = cm_GetRxConn(conns[nconns]);
493 if (conntimer[nconns] = (isDown ? 1 : 0))
494 rx_SetConnDeadTime(rxconns[nconns], 10);
498 lock_ReleaseRead(&cm_serverLock);
501 /* Perform the multi call */
503 multi_Rx(rxconns,nconns)
505 multi_RXAFS_GetCapabilities(&caps[multi_i]);
506 results[multi_i]=multi_error;
510 /* Process results of servers that support RXAFS_GetCapabilities */
511 for (i=0; i<nconns; i++) {
513 rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
514 rx_PutConnection(rxconns[i]);
515 cm_PutConn(conns[i]);
518 cm_GCConnections(tsp);
520 lock_ObtainMutex(&tsp->mx);
521 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
523 if (results[i] >= 0 || results[i] == RXGEN_OPCODE ||
524 results[i] == RX_CALL_BUSY) {
525 /* mark server as up */
526 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
529 /* we currently handle 32-bits of capabilities */
530 if (results[i] != RXGEN_OPCODE && results[i] != RX_CALL_BUSY &&
531 caps[i].Capabilities_len > 0) {
532 tsp->capabilities = caps[i].Capabilities_val[0];
533 xdr_free((xdrproc_t) xdr_Capabilities, &caps[i]);
534 caps[i].Capabilities_len = 0;
535 caps[i].Capabilities_val = 0;
537 tsp->capabilities = 0;
540 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
541 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
542 osi_LogSaveString(afsd_logp, hoststr),
543 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
546 /* Now update the volume status if necessary */
548 cm_server_vols_t * tsrvp;
552 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
553 for (i=0; i<NUM_SERVER_VOLS; i++) {
554 if (tsrvp->ids[i] != 0) {
557 lock_ReleaseMutex(&tsp->mx);
558 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
559 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
560 lock_ObtainMutex(&tsp->mx);
562 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
570 /* mark server as down */
571 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
572 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN);
573 tsp->downTime = time(NULL);
575 if (code != VRESTARTING) {
576 lock_ReleaseMutex(&tsp->mx);
577 cm_ForceNewConnections(tsp);
578 lock_ObtainMutex(&tsp->mx);
580 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
581 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
582 osi_LogSaveString(afsd_logp, hoststr),
583 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
586 /* Now update the volume status if necessary */
588 cm_server_vols_t * tsrvp;
592 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
593 for (i=0; i<NUM_SERVER_VOLS; i++) {
594 if (tsrvp->ids[i] != 0) {
597 lock_ReleaseMutex(&tsp->mx);
598 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
599 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
600 lock_ObtainMutex(&tsp->mx);
602 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
611 if (tsp->waitCount == 0)
612 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_PINGING);
614 osi_Wakeup((LONG_PTR)tsp);
616 lock_ReleaseMutex(&tsp->mx);
622 if ((flags & CM_FLAG_CHECKVLDBSERVERS) ||
623 !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
625 lock_ObtainRead(&cm_serverLock);
626 for (nconns=0, tsp = cm_allServersp; tsp && nconns < maxconns; tsp = tsp->allNextp) {
627 if (tsp->type != CM_SERVER_VLDB ||
628 tsp->cellp == NULL || /* SetPref only */
629 cellp && cellp != tsp->cellp)
632 cm_GetServerNoLock(tsp);
633 lock_ReleaseRead(&cm_serverLock);
635 lock_ObtainMutex(&tsp->mx);
636 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
638 if ((tsp->flags & CM_SERVERFLAG_PINGING) ||
639 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
640 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
641 lock_ReleaseMutex(&tsp->mx);
642 lock_ObtainRead(&cm_serverLock);
643 cm_PutServerNoLock(tsp);
647 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_PINGING);
648 lock_ReleaseMutex(&tsp->mx);
650 serversp[nconns] = tsp;
651 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
653 lock_ObtainRead(&cm_serverLock);
654 cm_PutServerNoLock(tsp);
657 lock_ObtainRead(&cm_serverLock);
658 rxconns[nconns] = cm_GetRxConn(conns[nconns]);
659 conntimer[nconns] = (isDown ? 1 : 0);
661 rx_SetConnDeadTime(rxconns[nconns], 10);
665 lock_ReleaseRead(&cm_serverLock);
668 /* Perform the multi call */
670 multi_Rx(rxconns,nconns)
672 multi_VL_ProbeServer();
673 results[multi_i]=multi_error;
677 /* Process results of servers that support VL_ProbeServer */
678 for (i=0; i<nconns; i++) {
680 rx_SetConnDeadTime(rxconns[i], ConnDeadtimeout);
681 rx_PutConnection(rxconns[i]);
682 cm_PutConn(conns[i]);
685 cm_GCConnections(tsp);
687 lock_ObtainMutex(&tsp->mx);
688 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
690 if (results[i] >= 0 || results[i] == RX_CALL_BUSY) {
691 /* mark server as up */
692 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
694 tsp->capabilities = 0;
696 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
697 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
698 osi_LogSaveString(afsd_logp, hoststr),
699 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
702 /* mark server as down */
703 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
704 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN);
705 tsp->downTime = time(NULL);
707 if (code != VRESTARTING) {
708 lock_ReleaseMutex(&tsp->mx);
709 cm_ForceNewConnections(tsp);
710 lock_ObtainMutex(&tsp->mx);
712 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
713 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
714 osi_LogSaveString(afsd_logp, hoststr),
715 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
719 if (tsp->waitCount == 0)
720 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_PINGING);
722 osi_Wakeup((LONG_PTR)tsp);
724 lock_ReleaseMutex(&tsp->mx);
738 void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
745 code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
746 0, KEY_QUERY_VALUE, &parmKey);
747 if (code == ERROR_SUCCESS) {
748 dummyLen = sizeof(multi);
749 code = RegQueryValueEx(parmKey, "MultiCheckServers", NULL, NULL,
750 (BYTE *) &multi, &dummyLen);
751 RegCloseKey (parmKey);
755 cm_CheckServersMulti(flags, cellp);
757 cm_CheckServersSingular(flags, cellp);
760 void cm_InitServer(void)
762 static osi_once_t once;
764 if (osi_Once(&once)) {
765 lock_InitializeRWLock(&cm_serverLock, "cm_serverLock", LOCK_HIERARCHY_SERVER_GLOBAL);
766 lock_InitializeRWLock(&cm_syscfgLock, "cm_syscfgLock", LOCK_HIERARCHY_SYSCFG_GLOBAL);
771 /* Protected by cm_syscfgLock (rw) */
772 int cm_noIPAddr; /* number of client network interfaces */
773 int cm_IPAddr[CM_MAXINTERFACE_ADDR]; /* client's IP address in host order */
774 int cm_SubnetMask[CM_MAXINTERFACE_ADDR];/* client's subnet mask in host order*/
775 int cm_NetMtu[CM_MAXINTERFACE_ADDR]; /* client's MTU sizes */
776 int cm_NetFlags[CM_MAXINTERFACE_ADDR]; /* network flags */
777 int cm_LanAdapterChangeDetected = 1;
779 void cm_SetLanAdapterChangeDetected(void)
781 lock_ObtainWrite(&cm_syscfgLock);
782 cm_LanAdapterChangeDetected = 1;
783 lock_ReleaseWrite(&cm_syscfgLock);
786 void cm_GetServer(cm_server_t *serverp)
788 lock_ObtainRead(&cm_serverLock);
789 InterlockedIncrement(&serverp->refCount);
790 lock_ReleaseRead(&cm_serverLock);
793 void cm_GetServerNoLock(cm_server_t *serverp)
795 InterlockedIncrement(&serverp->refCount);
798 void cm_PutServer(cm_server_t *serverp)
801 lock_ObtainRead(&cm_serverLock);
802 refCount = InterlockedDecrement(&serverp->refCount);
803 osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
804 lock_ReleaseRead(&cm_serverLock);
807 void cm_PutServerNoLock(cm_server_t *serverp)
809 afs_int32 refCount = InterlockedDecrement(&serverp->refCount);
810 osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
813 void cm_SetServerNo64Bit(cm_server_t * serverp, int no64bit)
815 lock_ObtainMutex(&serverp->mx);
817 _InterlockedOr(&serverp->flags, CM_SERVERFLAG_NO64BIT);
819 _InterlockedAnd(&serverp->flags, ~CM_SERVERFLAG_NO64BIT);
820 lock_ReleaseMutex(&serverp->mx);
823 void cm_SetServerNoInlineBulk(cm_server_t * serverp, int no)
825 lock_ObtainMutex(&serverp->mx);
827 _InterlockedOr(&serverp->flags, CM_SERVERFLAG_NOINLINEBULK);
829 _InterlockedAnd(&serverp->flags, ~CM_SERVERFLAG_NOINLINEBULK);
830 lock_ReleaseMutex(&serverp->mx);
833 void cm_SetServerIPRank(cm_server_t * serverp)
835 unsigned long serverAddr; /* in host byte order */
836 unsigned long myAddr, myNet, mySubnet;/* in host byte order */
837 unsigned long netMask;
841 lock_ObtainRead(&cm_syscfgLock);
842 if (cm_LanAdapterChangeDetected) {
843 lock_ConvertRToW(&cm_syscfgLock);
844 if (cm_LanAdapterChangeDetected) {
845 /* get network related info */
846 cm_noIPAddr = CM_MAXINTERFACE_ADDR;
847 code = syscfg_GetIFInfo(&cm_noIPAddr,
848 cm_IPAddr, cm_SubnetMask,
849 cm_NetMtu, cm_NetFlags);
850 cm_LanAdapterChangeDetected = 0;
852 lock_ConvertWToR(&cm_syscfgLock);
855 serverAddr = ntohl(serverp->addr.sin_addr.s_addr);
856 serverp->ipRank = CM_IPRANK_LOW; /* default settings */
858 for ( i=0; i < cm_noIPAddr; i++)
860 /* loop through all the client's IP address and compare
861 ** each of them against the server's IP address */
863 myAddr = cm_IPAddr[i];
864 if ( IN_CLASSA(myAddr) )
865 netMask = IN_CLASSA_NET;
866 else if ( IN_CLASSB(myAddr) )
867 netMask = IN_CLASSB_NET;
868 else if ( IN_CLASSC(myAddr) )
869 netMask = IN_CLASSC_NET;
873 myNet = myAddr & netMask;
874 mySubnet = myAddr & cm_SubnetMask[i];
876 if ( (serverAddr & netMask) == myNet )
878 if ( (serverAddr & cm_SubnetMask[i]) == mySubnet)
880 if ( serverAddr == myAddr ) {
881 serverp->ipRank = min(serverp->ipRank,
882 CM_IPRANK_TOP);/* same machine */
884 serverp->ipRank = min(serverp->ipRank,
885 CM_IPRANK_HI); /* same subnet */
888 serverp->ipRank = min(serverp->ipRank, CM_IPRANK_MED); /* same net */
891 } /* and of for loop */
892 lock_ReleaseRead(&cm_syscfgLock);
895 cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp, afsUUID *uuidp, afs_uint32 flags) {
898 osi_assertx(socketp->sin_family == AF_INET, "unexpected socket family");
900 lock_ObtainWrite(&cm_serverLock); /* get server lock */
901 tsp = cm_FindServer(socketp, type, TRUE);
903 /* we might have found a server created by set server prefs */
904 if (uuidp && !afs_uuid_is_nil(uuidp) &&
905 !(tsp->flags & CM_SERVERFLAG_UUID))
908 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_UUID);
910 lock_ReleaseWrite(&cm_serverLock);
914 tsp = malloc(sizeof(*tsp));
916 memset(tsp, 0, sizeof(*tsp));
919 if (uuidp && !afs_uuid_is_nil(uuidp)) {
921 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_UUID);
924 lock_InitializeMutex(&tsp->mx, "cm_server_t mutex", LOCK_HIERARCHY_SERVER);
925 tsp->addr = *socketp;
927 cm_SetServerIPRank(tsp);
929 tsp->allNextp = cm_allServersp;
930 cm_allServersp = tsp;
941 lock_ReleaseWrite(&cm_serverLock); /* release server lock */
943 if (!(flags & CM_FLAG_NOPROBE) && tsp) {
944 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN); /* assume down; ping will mark up if available */
945 cm_PingServer(tsp); /* Obtain Capabilities and check up/down state */
952 cm_FindServerByIP(afs_uint32 ipaddr, unsigned short port, int type, int locked)
957 lock_ObtainRead(&cm_serverLock);
959 for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
960 if (tsp->type == type &&
961 tsp->addr.sin_addr.S_un.S_addr == ipaddr &&
962 (tsp->addr.sin_port == port || tsp->addr.sin_port == 0))
966 /* bump ref count if we found the server */
968 cm_GetServerNoLock(tsp);
971 lock_ReleaseRead(&cm_serverLock);
977 cm_FindServerByUuid(afsUUID *serverUuid, int type, int locked)
982 lock_ObtainRead(&cm_serverLock);
984 for (tsp = cm_allServersp; tsp; tsp = tsp->allNextp) {
985 if (tsp->type == type && !afs_uuid_equal(&tsp->uuid, serverUuid))
989 /* bump ref count if we found the server */
991 cm_GetServerNoLock(tsp);
994 lock_ReleaseRead(&cm_serverLock);
999 /* find a server based on its properties */
1000 cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type, int locked)
1002 osi_assertx(addrp->sin_family == AF_INET, "unexpected socket value");
1004 return cm_FindServerByIP(addrp->sin_addr.s_addr, addrp->sin_port, type, locked);
1007 cm_server_vols_t *cm_NewServerVols(void) {
1008 cm_server_vols_t *tsvp;
1010 tsvp = malloc(sizeof(*tsvp));
1012 memset(tsvp, 0, sizeof(*tsvp));
1018 * cm_NewServerRef() returns with the allocated cm_serverRef_t
1019 * with a refCount of 1.
1021 cm_serverRef_t *cm_NewServerRef(cm_server_t *serverp, afs_uint32 volID)
1023 cm_serverRef_t *tsrp;
1024 cm_server_vols_t **tsrvpp = NULL;
1025 afs_uint32 *slotp = NULL;
1028 cm_GetServer(serverp);
1029 tsrp = malloc(sizeof(*tsrp));
1030 tsrp->server = serverp;
1031 tsrp->status = srv_not_busy;
1033 tsrp->volID = volID;
1036 /* if we have a non-zero volID, we need to add it to the list
1037 * of volumes maintained by the server. There are two phases:
1038 * (1) see if the volID is already in the list and (2) insert
1039 * it into the first empty slot if it is not.
1042 lock_ObtainMutex(&serverp->mx);
1044 tsrvpp = &serverp->vols;
1048 for (i=0; i<NUM_SERVER_VOLS; i++) {
1049 if ((*tsrvpp)->ids[i] == volID) {
1052 } else if (!slotp && (*tsrvpp)->ids[i] == 0) {
1053 slotp = &(*tsrvpp)->ids[i];
1060 tsrvpp = &(*tsrvpp)->nextp;
1067 /* if we didn't find an empty slot in a current
1068 * page we must need a new page */
1069 *tsrvpp = cm_NewServerVols();
1071 (*tsrvpp)->ids[0] = volID;
1075 lock_ReleaseMutex(&serverp->mx);
1081 void cm_GetServerRef(cm_serverRef_t *tsrp, int locked)
1086 lock_ObtainRead(&cm_serverLock);
1087 refCount = InterlockedIncrement(&tsrp->refCount);
1089 lock_ReleaseRead(&cm_serverLock);
1092 afs_int32 cm_PutServerRef(cm_serverRef_t *tsrp, int locked)
1097 lock_ObtainRead(&cm_serverLock);
1098 refCount = InterlockedDecrement(&tsrp->refCount);
1099 osi_assertx(refCount >= 0, "cm_serverRef_t refCount underflow");
1102 lock_ReleaseRead(&cm_serverLock);
1108 cm_ServerListSize(cm_serverRef_t* serversp)
1110 afs_uint32 count = 0;
1111 cm_serverRef_t *tsrp;
1113 lock_ObtainRead(&cm_serverLock);
1114 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1115 if (tsrp->status == srv_deleted)
1119 lock_ReleaseRead(&cm_serverLock);
1123 LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
1127 cm_serverRef_t *tsrp;
1129 lock_ObtainRead(&cm_serverLock);
1130 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1131 if (tsrp->status == srv_deleted)
1137 sum ^= (LONG_PTR) tsrp->server;
1140 lock_ReleaseRead(&cm_serverLock);
1145 ** Insert a server into the server list keeping the list sorted in
1146 ** ascending order of ipRank.
1148 ** The refCount of the cm_serverRef_t is not altered.
1150 void cm_InsertServerList(cm_serverRef_t** list, cm_serverRef_t* element)
1152 cm_serverRef_t *current;
1153 unsigned short rank;
1155 lock_ObtainWrite(&cm_serverLock);
1157 * Since we are grabbing the serverLock exclusively remove any
1158 * deleted serverRef objects with a zero refcount before
1159 * inserting the new item.
1162 cm_serverRef_t **currentp = list;
1163 cm_serverRef_t **nextp = NULL;
1164 cm_serverRef_t * next = NULL;
1166 for (currentp = list; *currentp; currentp = nextp)
1168 nextp = &(*currentp)->next;
1169 if ((*currentp)->refCount == 0 &&
1170 (*currentp)->status == srv_deleted) {
1173 if ((*currentp)->volID)
1174 cm_RemoveVolumeFromServer((*currentp)->server, (*currentp)->volID);
1175 cm_FreeServer((*currentp)->server);
1182 /* insertion into empty list or at the beginning of the list */
1185 element->next = NULL;
1191 * Now that deleted entries have been removed and we know that the
1192 * list was not empty, look for duplicates. If the element we are
1193 * inserting already exists, discard it.
1195 for ( current = *list; current; current = current->next)
1197 cm_server_t * server1 = current->server;
1198 cm_server_t * server2 = element->server;
1200 if (current->status == srv_deleted)
1203 if (server1->type != server2->type)
1206 if (server1->addr.sin_addr.s_addr != server2->addr.sin_addr.s_addr)
1209 if ((server1->flags & CM_SERVERFLAG_UUID) != (server2->flags & CM_SERVERFLAG_UUID))
1212 if ((server1->flags & CM_SERVERFLAG_UUID) &&
1213 !afs_uuid_equal(&server1->uuid, &server2->uuid))
1216 /* we must have a match, discard the new element */
1221 rank = element->server->activeRank;
1223 /* insertion at the beginning of the list */
1224 if ((*list)->server->activeRank > rank)
1226 element->next = *list;
1231 /* find appropriate place to insert */
1232 for ( current = *list; current->next; current = current->next)
1234 if ( current->next->server->activeRank > rank )
1237 element->next = current->next;
1238 current->next = element;
1241 lock_ReleaseWrite(&cm_serverLock);
1244 ** Re-sort the server list with the modified rank
1245 ** returns 0 if element was changed successfully.
1246 ** returns 1 if list remained unchanged.
1248 long cm_ChangeRankServer(cm_serverRef_t** list, cm_server_t* server)
1250 cm_serverRef_t **current;
1251 cm_serverRef_t *element;
1253 lock_ObtainWrite(&cm_serverLock);
1257 /* if there is max of one element in the list, nothing to sort */
1258 if ( (!*current) || !((*current)->next) ) {
1259 lock_ReleaseWrite(&cm_serverLock);
1260 return 1; /* list unchanged: return success */
1263 /* if the server is on the list, delete it from list */
1266 if ( (*current)->server == server)
1268 element = (*current);
1269 *current = element->next; /* delete it */
1272 current = & ( (*current)->next);
1274 lock_ReleaseWrite(&cm_serverLock);
1276 /* if this volume is not replicated on this server */
1278 return 1; /* server is not on list */
1280 /* re-insert deleted element into the list with modified rank*/
1281 cm_InsertServerList(list, element);
1286 ** If there are more than one server on the list and the first n servers on
1287 ** the list have the same rank( n>1), then randomise among the first n servers.
1289 void cm_RandomizeServer(cm_serverRef_t** list)
1292 cm_serverRef_t* tsrp, *lastTsrp;
1293 unsigned short lowestRank;
1295 lock_ObtainWrite(&cm_serverLock);
1298 /* an empty list or a list with only one element */
1299 if ( !tsrp || ! tsrp->next ) {
1300 lock_ReleaseWrite(&cm_serverLock);
1304 /* count the number of servers with the lowest rank */
1305 lowestRank = tsrp->server->activeRank;
1306 for ( count=1, tsrp=tsrp->next; tsrp; tsrp=tsrp->next)
1308 if ( tsrp->server->activeRank != lowestRank)
1314 /* if there is only one server with the lowest rank, we are done */
1316 lock_ReleaseWrite(&cm_serverLock);
1320 picked = rand() % count;
1322 lock_ReleaseWrite(&cm_serverLock);
1327 while (--picked >= 0)
1332 lastTsrp->next = tsrp->next; /* delete random element from list*/
1333 tsrp->next = *list; /* insert element at the beginning of list */
1335 lock_ReleaseWrite(&cm_serverLock);
1338 /* call cm_FreeServer while holding a write lock on cm_serverLock */
1339 void cm_FreeServer(cm_server_t* serverp)
1341 cm_server_vols_t * tsrvp, *nextp;
1344 cm_PutServerNoLock(serverp);
1345 if (serverp->refCount == 0)
1348 * we need to check to ensure that all of the connections
1349 * for this server have a 0 refCount; otherwise, they will
1350 * not be garbage collected
1352 * must drop the cm_serverLock because cm_GCConnections
1353 * obtains the cm_connLock and that comes first in the
1356 lock_ReleaseWrite(&cm_serverLock);
1357 cm_GCConnections(serverp); /* connsp */
1358 lock_ObtainWrite(&cm_serverLock);
1363 * Once we have the cm_serverLock locked check to make
1364 * sure the refCount is still zero before removing the
1367 if (serverp->refCount == 0) {
1368 if (!(serverp->flags & CM_SERVERFLAG_PREF_SET)) {
1369 switch (serverp->type) {
1370 case CM_SERVER_VLDB:
1371 cm_numVldbServers--;
1373 case CM_SERVER_FILE:
1374 cm_numFileServers--;
1378 lock_FinalizeMutex(&serverp->mx);
1379 if ( cm_allServersp == serverp )
1380 cm_allServersp = serverp->allNextp;
1384 for(tsp = cm_allServersp; tsp->allNextp; tsp=tsp->allNextp) {
1385 if ( tsp->allNextp == serverp ) {
1386 tsp->allNextp = serverp->allNextp;
1392 /* free the volid list */
1393 for ( tsrvp = serverp->vols; tsrvp; tsrvp = nextp) {
1394 nextp = tsrvp->nextp;
1403 /* Called with cm_serverLock write locked */
1404 void cm_RemoveVolumeFromServer(cm_server_t * serverp, afs_uint32 volID)
1406 cm_server_vols_t * tsrvp;
1412 for (tsrvp = serverp->vols; tsrvp; tsrvp = tsrvp->nextp) {
1413 for (i=0; i<NUM_SERVER_VOLS; i++) {
1414 if (tsrvp->ids[i] == volID) {
1422 int cm_IsServerListEmpty(cm_serverRef_t *serversp)
1424 cm_serverRef_t *tsrp;
1427 if (serversp == NULL)
1428 return CM_ERROR_EMPTY;
1430 lock_ObtainRead(&cm_serverLock);
1431 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1432 if (tsrp->status == srv_deleted)
1437 lock_ReleaseRead(&cm_serverLock);
1439 return ( allDeleted ? CM_ERROR_EMPTY : 0 );
1442 void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
1444 cm_serverRef_t **current;
1445 cm_serverRef_t **nextp;
1446 cm_serverRef_t * next;
1449 lock_ObtainWrite(&cm_serverLock);
1459 nextp = &(*current)->next;
1460 refCount = cm_PutServerRef(*current, TRUE);
1461 if (refCount == 0) {
1464 if ((*current)->volID)
1465 cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
1466 cm_FreeServer((*current)->server);
1470 if (flags & CM_FREESERVERLIST_DELETE) {
1471 (*current)->status = srv_deleted;
1472 if ((*current)->volID)
1473 cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
1481 lock_ReleaseWrite(&cm_serverLock);
1484 /* dump all servers to a file.
1485 * cookie is used to identify this batch for easy parsing,
1486 * and it a string provided by a caller
1488 int cm_DumpServers(FILE *outputFile, char *cookie, int lock)
1497 lock_ObtainRead(&cm_serverLock);
1500 "%s - dumping servers - cm_numFileServers=%d, cm_numVldbServers=%d\r\n",
1501 cookie, cm_numFileServers, cm_numVldbServers);
1502 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1504 for (tsp = cm_allServersp; tsp; tsp=tsp->allNextp)
1509 switch (tsp->type) {
1510 case CM_SERVER_VLDB:
1513 case CM_SERVER_FILE:
1520 afsUUID_to_string(&tsp->uuid, uuidstr, sizeof(uuidstr));
1521 afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
1522 down = ctime(&tsp->downTime);
1523 down[strlen(down)-1] = '\0';
1526 "%s - tsp=0x%p cell=%s addr=%-15s port=%u uuid=%s type=%s caps=0x%x "
1527 "flags=0x%x waitCount=%u rank=%u downTime=\"%s\" refCount=%u\r\n",
1528 cookie, tsp, tsp->cellp ? tsp->cellp->name : "", hoststr,
1529 ntohs(tsp->addr.sin_port), uuidstr, type,
1530 tsp->capabilities, tsp->flags, tsp->waitCount, tsp->activeRank,
1531 (tsp->flags & CM_SERVERFLAG_DOWN) ? down : "up",
1533 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1535 sprintf(output, "%s - Done dumping servers.\r\n", cookie);
1536 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1539 lock_ReleaseRead(&cm_serverLock);
1545 * Determine if two servers are in fact the same.
1547 * Returns 1 if they match, 0 if they do not
1549 int cm_ServerEqual(cm_server_t *srv1, cm_server_t *srv2)
1553 if (srv1 == NULL || srv2 == NULL)
1559 if (srv1->flags & CM_SERVERFLAG_UUID) {
1560 if (!(srv2->flags & CM_SERVERFLAG_UUID))
1563 /* Both support UUID */
1564 if (UuidEqual((UUID *)&srv1->uuid, (UUID *)&srv2->uuid, &status))
1567 if (srv1->flags & CM_SERVERFLAG_UUID)
1570 /* Neither support UUID so perform an addr/port comparison */
1571 if ( srv1->addr.sin_family == srv2->addr.sin_family &&
1572 srv1->addr.sin_addr.s_addr == srv2->addr.sin_addr.s_addr &&
1573 srv1->addr.sin_port == srv2->addr.sin_port )