2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
24 #include <WINNT\syscfg.h>
25 #include <WINNT/afsreg.h>
30 osi_rwlock_t cm_serverLock;
31 osi_rwlock_t cm_syscfgLock;
33 cm_server_t *cm_serversAllFirstp = NULL;
34 cm_server_t *cm_serversAllLastp = NULL;
36 afs_uint32 cm_numFileServers = 0;
37 afs_uint32 cm_numVldbServers = 0;
40 cm_ForceNewConnectionsAllServers(void)
44 lock_ObtainRead(&cm_serverLock);
45 for (tsp = cm_serversAllFirstp;
47 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
48 cm_GetServerNoLock(tsp);
49 lock_ReleaseRead(&cm_serverLock);
50 cm_ForceNewConnections(tsp);
51 lock_ObtainRead(&cm_serverLock);
52 cm_PutServerNoLock(tsp);
54 lock_ReleaseRead(&cm_serverLock);
58 cm_ServerClearRPCStats(void) {
62 lock_ObtainRead(&cm_serverLock);
63 for (tsp = cm_serversAllFirstp;
65 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
69 rx_ClearPeerRPCStats(opcode_VL_ProbeServer>>32, tsp->addr.sin_addr.s_addr, port);
73 rx_ClearPeerRPCStats(opcode_RXAFS_GetCapabilities>>32, tsp->addr.sin_addr.s_addr, port);
74 rx_ClearPeerRPCStats(opcode_RXAFS_GetTime>>32, tsp->addr.sin_addr.s_addr, port);
78 lock_ReleaseRead(&cm_serverLock);
82 * lock_ObtainMutex must be held prior to calling
86 cm_RankServer(cm_server_t * tsp)
88 afs_int32 code = 0; /* start with "success" */
89 struct rx_debugPeer tpeer;
90 struct rx_peer * rxPeer;
93 afs_uint64 perfRank = 0;
97 int isDown = (tsp->flags & CM_SERVERFLAG_DOWN);
98 void *peerRpcStats = NULL;
99 afs_uint64 opcode = 0;
104 opcode = opcode_VL_ProbeServer;
108 opcode = opcode_RXAFS_GetCapabilities;
114 cm_SetServerIPRank(tsp);
120 * There are three potential components to the ranking:
121 * 1. Any administrative set preference whether it be
122 * via "fs setserverprefs", registry or dns.
124 * 2. Network subnet mask comparison.
126 * 3. Performance data.
128 * If there is an administrative rank, that is the
129 * the primary factor. If not the primary factor
130 * is the network ranking.
133 code = rx_GetLocalPeers(tsp->addr.sin_addr.s_addr, port, &tpeer);
135 peerRpcStats = rx_CopyPeerRPCStats(opcode, tsp->addr.sin_addr.s_addr, port);
136 if (peerRpcStats == NULL && tsp->type == CM_SERVER_FILE)
137 peerRpcStats = rx_CopyPeerRPCStats(opcode_RXAFS_GetTime, tsp->addr.sin_addr.s_addr, port);
139 afs_uint64 execTimeSum = _8THMSEC(RPCOpStat_ExecTimeSum(peerRpcStats));
140 afs_uint64 queueTimeSum = _8THMSEC(RPCOpStat_QTimeSum(peerRpcStats));
141 afs_uint64 numCalls = RPCOpStat_NumCalls(peerRpcStats);
144 rtt = (execTimeSum - queueTimeSum) / numCalls;
146 rx_ReleaseRPCStats(peerRpcStats);
149 if (rtt == 0 && tpeer.rtt) {
156 perfRank += (6000 * log_rtt / 5000) * 5000;
158 if (tsp->type == CM_SERVER_FILE) {
159 /* give an edge to servers with high congestion windows */
160 perfRank -= (tpeer.cwind - 1)* 15;
165 if (tsp->adminRank) {
166 newRank = tsp->adminRank * 0.8;
167 newRank += tsp->ipRank * 0.2;
169 newRank = tsp->ipRank;
173 newRank += perfRank * 0.1;
175 newRank += (rand() & 0x000f); /* randomize */
177 if (newRank > 0xFFFF)
178 osi_Log1(afsd_logp, "new server rank %I64u exceeds 0xFFFF", newRank);
181 * If the ranking changes by more than the randomization
182 * factor, update the server reference lists.
184 if (abs(newRank - tsp->activeRank) > 0xf) {
185 tsp->activeRank = newRank;
187 lock_ReleaseMutex(&tsp->mx);
191 * find volumes which might have RO copy
192 * on server and change the ordering of
195 cm_ChangeRankVolume(tsp);
198 /* set preferences for an existing vlserver */
199 cm_ChangeRankCellVLServer(tsp);
202 lock_ObtainMutex(&tsp->mx);
210 cm_MarkServerDown(cm_server_t *tsp, afs_int32 code, int wasDown)
213 /* mark server as down */
214 if (!(tsp->flags & CM_SERVERFLAG_DOWN)) {
215 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN);
216 tsp->downTime = time(NULL);
218 if (code != VRESTARTING) {
219 lock_ReleaseMutex(&tsp->mx);
220 cm_ForceNewConnections(tsp);
221 lock_ObtainMutex(&tsp->mx);
223 /* Now update the volume status if necessary */
225 if (tsp->type == CM_SERVER_FILE) {
226 cm_server_vols_t * tsrvp;
231 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
232 for (i=0; i<NUM_SERVER_VOLS; i++) {
233 if (tsrvp->ids[i] != 0) {
236 lock_ReleaseMutex(&tsp->mx);
237 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i],
239 CM_GETVOL_FLAG_NO_LRU_UPDATE,
241 lock_ObtainMutex(&tsp->mx);
243 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
255 cm_PingServer(cm_server_t *tsp)
260 struct rx_connection * rxconnp;
261 Capabilities caps = {0, 0};
265 lock_ObtainMutex(&tsp->mx);
266 if (InterlockedIncrement(&tsp->pingCount) > 1) {
268 osi_SleepM((LONG_PTR)tsp, &tsp->mx);
269 lock_ObtainMutex(&tsp->mx);
270 InterlockedDecrement(&tsp->pingCount);
271 if (--tsp->waitCount > 0)
272 osi_Wakeup((LONG_PTR)tsp);
273 lock_ReleaseMutex(&tsp->mx);
276 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
277 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
278 lock_ReleaseMutex(&tsp->mx);
281 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp);
283 code = RX_CALL_DEAD; /* No network */
285 /* now call the appropriate ping call. Drop the timeout if
286 * the server is known to be down, so that we don't waste a
287 * lot of time retiming out down servers.
290 osi_Log4(afsd_logp, "cm_PingServer server %s (%s) was %s with caps 0x%x",
291 osi_LogSaveString(afsd_logp, hoststr),
292 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
293 wasDown ? "down" : "up",
296 rxconnp = cm_GetRxConn(connp);
298 rx_SetConnHardDeadTime(rxconnp, 10);
299 if (tsp->type == CM_SERVER_VLDB) {
300 code = VL_ProbeServer(rxconnp);
304 code = RXAFS_GetCapabilities(rxconnp, &caps);
307 rx_SetConnHardDeadTime(rxconnp, HardDeadtimeout);
308 rx_PutConnection(rxconnp);
310 } /* got an unauthenticated connection to this server */
312 lock_ObtainMutex(&tsp->mx);
313 if (code >= 0 || code == RXGEN_OPCODE) {
314 /* mark server as up */
315 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
318 /* we currently handle 32-bits of capabilities */
319 if (code != RXGEN_OPCODE && caps.Capabilities_len > 0) {
320 tsp->capabilities = caps.Capabilities_val[0];
321 xdr_free((xdrproc_t) xdr_Capabilities, &caps);
322 caps.Capabilities_len = 0;
323 caps.Capabilities_val = 0;
325 tsp->capabilities = 0;
328 osi_Log3(afsd_logp, "cm_PingServer server %s (%s) is up with caps 0x%x",
329 osi_LogSaveString(afsd_logp, hoststr),
330 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
333 /* Now update the volume status if necessary */
335 cm_server_vols_t * tsrvp;
339 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
340 for (i=0; i<NUM_SERVER_VOLS; i++) {
341 if (tsrvp->ids[i] != 0) {
344 lock_ReleaseMutex(&tsp->mx);
345 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
346 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
347 lock_ObtainMutex(&tsp->mx);
349 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
358 cm_MarkServerDown(tsp, code, wasDown);
360 osi_Log3(afsd_logp, "cm_PingServer server %s (%s) is down with caps 0x%x",
361 osi_LogSaveString(afsd_logp, hoststr),
362 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
366 InterlockedDecrement(&tsp->pingCount);
367 if (tsp->waitCount > 0)
368 osi_Wakeup((LONG_PTR)tsp);
369 lock_ReleaseMutex(&tsp->mx);
377 lock_ObtainRead(&cm_serverLock);
378 for (tsp = cm_serversAllFirstp;
380 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
381 cm_GetServerNoLock(tsp);
382 lock_ReleaseRead(&cm_serverLock);
384 lock_ObtainMutex(&tsp->mx);
386 /* if the server is not down, rank the server */
387 if(!(tsp->flags & CM_SERVERFLAG_DOWN))
390 lock_ReleaseMutex(&tsp->mx);
392 lock_ObtainRead(&cm_serverLock);
393 cm_PutServerNoLock(tsp);
395 lock_ReleaseRead(&cm_serverLock);
398 static void cm_CheckServersSingular(afs_uint32 flags, cm_cell_t *cellp)
400 /* ping all file servers, up or down, with unauthenticated connection,
401 * to find out whether we have all our callbacks from the server still.
402 * Also, ping down VLDBs.
410 lock_ObtainRead(&cm_serverLock);
411 for (tsp = cm_serversAllFirstp;
413 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
414 cm_GetServerNoLock(tsp);
415 lock_ReleaseRead(&cm_serverLock);
417 /* now process the server */
418 lock_ObtainMutex(&tsp->mx);
421 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
422 isFS = tsp->type == CM_SERVER_FILE;
423 isVLDB = tsp->type == CM_SERVER_VLDB;
425 /* only do the ping if the cell matches the requested cell, or we're
426 * matching all cells (cellp == NULL), and if we've requested to ping
427 * this type of {up, down} servers.
429 if ((cellp == NULL || cellp == tsp->cellp) &&
430 ((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
431 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS))) &&
432 ((!(flags & CM_FLAG_CHECKVLDBSERVERS) ||
433 isVLDB && (flags & CM_FLAG_CHECKVLDBSERVERS)) &&
434 (!(flags & CM_FLAG_CHECKFILESERVERS) ||
435 isFS && (flags & CM_FLAG_CHECKFILESERVERS)))) {
437 } /* we're supposed to check this up/down server */
438 lock_ReleaseMutex(&tsp->mx);
440 /* at this point, we've adjusted the server state, so do the ping and
446 /* also, run the GC function for connections on all of the
447 * server's connections.
449 cm_GCConnections(tsp);
451 lock_ObtainRead(&cm_serverLock);
452 cm_PutServerNoLock(tsp);
454 lock_ReleaseRead(&cm_serverLock);
457 static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp)
460 * The goal of this function is to probe simultaneously
461 * probe all of the up/down servers (vldb/file) as
462 * specified by flags in the minimum number of RPCs.
463 * Effectively that means use one multi_RXAFS_GetCapabilities()
464 * followed by possibly one multi_RXAFS_GetTime() and
465 * one multi_VL_ProbeServer().
467 * To make this work we must construct the list of vldb
468 * and file servers that are to be probed as well as the
469 * associated data structures.
472 int srvAddrCount = 0;
473 struct srvAddr **addrs = NULL;
474 cm_conn_t **conns = NULL;
475 struct rx_connection **rxconns = NULL;
477 afs_int32 i, nconns = 0, maxconns;
478 afs_int32 *conntimer, *results;
479 Capabilities *caps = NULL;
480 cm_server_t ** serversp, *tsp;
481 afs_uint32 isDown, wasDown;
487 maxconns = max(cm_numFileServers,cm_numVldbServers);
491 conns = (cm_conn_t **)malloc(maxconns * sizeof(cm_conn_t *));
492 rxconns = (struct rx_connection **)malloc(maxconns * sizeof(struct rx_connection *));
493 conntimer = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
494 results = (afs_int32 *)malloc(maxconns * sizeof (afs_int32));
495 serversp = (cm_server_t **)malloc(maxconns * sizeof(cm_server_t *));
496 caps = (Capabilities *)malloc(maxconns * sizeof(Capabilities));
498 memset(caps, 0, maxconns * sizeof(Capabilities));
500 if ((flags & CM_FLAG_CHECKFILESERVERS) ||
501 !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
503 lock_ObtainRead(&cm_serverLock);
504 for (nconns=0, tsp = cm_serversAllFirstp;
505 tsp != NULL && nconns < maxconns;
506 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
507 if (tsp->type != CM_SERVER_FILE ||
508 tsp->cellp == NULL || /* SetPref only */
509 cellp && cellp != tsp->cellp)
512 cm_GetServerNoLock(tsp);
513 lock_ReleaseRead(&cm_serverLock);
515 lock_ObtainMutex(&tsp->mx);
516 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
518 if (tsp->pingCount > 0 ||
519 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
520 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
521 lock_ReleaseMutex(&tsp->mx);
522 lock_ObtainRead(&cm_serverLock);
523 cm_PutServerNoLock(tsp);
527 InterlockedIncrement(&tsp->pingCount);
528 lock_ReleaseMutex(&tsp->mx);
530 serversp[nconns] = tsp;
532 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
536 if (code == RX_CALL_DEAD) {
537 lock_ObtainMutex(&tsp->mx);
538 cm_MarkServerDown(tsp, code, isDown);
539 lock_ReleaseMutex(&tsp->mx);
541 lock_ObtainRead(&cm_serverLock);
542 cm_PutServerNoLock(tsp);
545 lock_ObtainRead(&cm_serverLock);
546 rxconns[nconns] = cm_GetRxConn(conns[nconns]);
547 if (conntimer[nconns] = (isDown ? 1 : 0))
548 rx_SetConnHardDeadTime(rxconns[nconns], 10);
552 lock_ReleaseRead(&cm_serverLock);
555 /* Perform the multi call */
557 multi_Rx(rxconns,nconns)
559 multi_RXAFS_GetCapabilities(&caps[multi_i]);
560 results[multi_i]=multi_error;
564 /* Process results of servers that support RXAFS_GetCapabilities */
565 for (i=0; i<nconns; i++) {
567 rx_SetConnHardDeadTime(rxconns[i], HardDeadtimeout);
568 rx_PutConnection(rxconns[i]);
569 cm_PutConn(conns[i]);
572 cm_GCConnections(tsp);
574 lock_ObtainMutex(&tsp->mx);
575 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
577 if (results[i] >= 0 || results[i] == RXGEN_OPCODE) {
578 /* mark server as up */
579 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
582 /* we currently handle 32-bits of capabilities */
583 if (results[i] != RXGEN_OPCODE && caps[i].Capabilities_len > 0) {
584 tsp->capabilities = caps[i].Capabilities_val[0];
585 xdr_free((xdrproc_t) xdr_Capabilities, &caps[i]);
586 caps[i].Capabilities_len = 0;
587 caps[i].Capabilities_val = 0;
589 tsp->capabilities = 0;
592 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
593 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
594 osi_LogSaveString(afsd_logp, hoststr),
595 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
598 /* Now update the volume status if necessary */
600 cm_server_vols_t * tsrvp;
604 for (tsrvp = tsp->vols; tsrvp; tsrvp = tsrvp->nextp) {
605 for (i=0; i<NUM_SERVER_VOLS; i++) {
606 if (tsrvp->ids[i] != 0) {
609 lock_ReleaseMutex(&tsp->mx);
610 code = cm_FindVolumeByID(tsp->cellp, tsrvp->ids[i], cm_rootUserp,
611 &req, CM_GETVOL_FLAG_NO_LRU_UPDATE, &volp);
612 lock_ObtainMutex(&tsp->mx);
614 cm_UpdateVolumeStatus(volp, tsrvp->ids[i]);
623 cm_MarkServerDown(tsp, results[i], wasDown);
625 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
626 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
627 osi_LogSaveString(afsd_logp, hoststr),
628 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
632 InterlockedDecrement(&tsp->pingCount);
633 if (tsp->waitCount > 0)
634 osi_Wakeup((LONG_PTR)tsp);
636 lock_ReleaseMutex(&tsp->mx);
642 if ((flags & CM_FLAG_CHECKVLDBSERVERS) ||
643 !(flags & (CM_FLAG_CHECKFILESERVERS|CM_FLAG_CHECKVLDBSERVERS)))
645 lock_ObtainRead(&cm_serverLock);
646 for (nconns=0, tsp = cm_serversAllFirstp;
647 tsp != NULL && nconns < maxconns;
648 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
649 if (tsp->type != CM_SERVER_VLDB ||
650 tsp->cellp == NULL || /* SetPref only */
651 cellp && cellp != tsp->cellp)
654 cm_GetServerNoLock(tsp);
655 lock_ReleaseRead(&cm_serverLock);
657 lock_ObtainMutex(&tsp->mx);
658 isDown = tsp->flags & CM_SERVERFLAG_DOWN;
660 if (tsp->pingCount > 0 ||
661 !((isDown && (flags & CM_FLAG_CHECKDOWNSERVERS)) ||
662 (!isDown && (flags & CM_FLAG_CHECKUPSERVERS)))) {
663 lock_ReleaseMutex(&tsp->mx);
664 lock_ObtainRead(&cm_serverLock);
665 cm_PutServerNoLock(tsp);
669 InterlockedIncrement(&tsp->pingCount);
670 lock_ReleaseMutex(&tsp->mx);
672 serversp[nconns] = tsp;
674 code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
678 if (code == RX_CALL_DEAD) {
679 lock_ObtainMutex(&tsp->mx);
680 cm_MarkServerDown(tsp, code, isDown);
681 lock_ReleaseMutex(&tsp->mx);
683 lock_ObtainRead(&cm_serverLock);
684 cm_PutServerNoLock(tsp);
687 lock_ObtainRead(&cm_serverLock);
688 rxconns[nconns] = cm_GetRxConn(conns[nconns]);
689 conntimer[nconns] = (isDown ? 1 : 0);
691 rx_SetConnHardDeadTime(rxconns[nconns], 10);
695 lock_ReleaseRead(&cm_serverLock);
698 /* Perform the multi call */
700 multi_Rx(rxconns,nconns)
702 multi_VL_ProbeServer();
703 results[multi_i]=multi_error;
707 /* Process results of servers that support VL_ProbeServer */
708 for (i=0; i<nconns; i++) {
710 rx_SetConnHardDeadTime(rxconns[i], HardDeadtimeout);
711 rx_PutConnection(rxconns[i]);
712 cm_PutConn(conns[i]);
715 cm_GCConnections(tsp);
717 lock_ObtainMutex(&tsp->mx);
718 wasDown = tsp->flags & CM_SERVERFLAG_DOWN;
720 if (results[i] >= 0) {
721 /* mark server as up */
722 _InterlockedAnd(&tsp->flags, ~CM_SERVERFLAG_DOWN);
724 tsp->capabilities = 0;
726 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
727 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is up with caps 0x%x",
728 osi_LogSaveString(afsd_logp, hoststr),
729 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
734 cm_MarkServerDown(tsp, results[i], wasDown);
736 afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
737 osi_Log3(afsd_logp, "cm_MultiPingServer server %s (%s) is down with caps 0x%x",
738 osi_LogSaveString(afsd_logp, hoststr),
739 tsp->type == CM_SERVER_VLDB ? "vldb" : "file",
743 InterlockedDecrement(&tsp->pingCount);
744 if (tsp->waitCount > 0)
745 osi_Wakeup((LONG_PTR)tsp);
747 lock_ReleaseMutex(&tsp->mx);
761 void cm_CheckServers(afs_uint32 flags, cm_cell_t *cellp)
768 code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSREG_CLT_SVC_PARAM_SUBKEY,
769 0, KEY_QUERY_VALUE, &parmKey);
770 if (code == ERROR_SUCCESS) {
771 dummyLen = sizeof(multi);
772 code = RegQueryValueEx(parmKey, "MultiCheckServers", NULL, NULL,
773 (BYTE *) &multi, &dummyLen);
774 RegCloseKey (parmKey);
778 cm_CheckServersMulti(flags, cellp);
780 cm_CheckServersSingular(flags, cellp);
783 void cm_InitServer(void)
785 static osi_once_t once;
787 if (osi_Once(&once)) {
788 lock_InitializeRWLock(&cm_serverLock, "cm_serverLock", LOCK_HIERARCHY_SERVER_GLOBAL);
789 lock_InitializeRWLock(&cm_syscfgLock, "cm_syscfgLock", LOCK_HIERARCHY_SYSCFG_GLOBAL);
794 /* Protected by cm_syscfgLock (rw) */
795 int cm_noIPAddr; /* number of client network interfaces */
796 int cm_IPAddr[CM_MAXINTERFACE_ADDR]; /* client's IP address in host order */
797 int cm_SubnetMask[CM_MAXINTERFACE_ADDR];/* client's subnet mask in host order*/
798 int cm_NetMtu[CM_MAXINTERFACE_ADDR]; /* client's MTU sizes */
799 int cm_NetFlags[CM_MAXINTERFACE_ADDR]; /* network flags */
800 int cm_LanAdapterChangeDetected = 1;
802 void cm_SetLanAdapterChangeDetected(void)
804 lock_ObtainWrite(&cm_syscfgLock);
805 cm_LanAdapterChangeDetected = 1;
806 lock_ReleaseWrite(&cm_syscfgLock);
809 void cm_GetServer(cm_server_t *serverp)
811 lock_ObtainRead(&cm_serverLock);
812 InterlockedIncrement(&serverp->refCount);
813 lock_ReleaseRead(&cm_serverLock);
816 void cm_GetServerNoLock(cm_server_t *serverp)
818 InterlockedIncrement(&serverp->refCount);
821 void cm_PutServer(cm_server_t *serverp)
824 lock_ObtainRead(&cm_serverLock);
825 refCount = InterlockedDecrement(&serverp->refCount);
826 osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
827 lock_ReleaseRead(&cm_serverLock);
830 void cm_PutServerNoLock(cm_server_t *serverp)
832 afs_int32 refCount = InterlockedDecrement(&serverp->refCount);
833 osi_assertx(refCount >= 0, "cm_server_t refCount underflow");
836 void cm_SetServerNo64Bit(cm_server_t * serverp, int no64bit)
838 lock_ObtainMutex(&serverp->mx);
840 _InterlockedOr(&serverp->flags, CM_SERVERFLAG_NO64BIT);
842 _InterlockedAnd(&serverp->flags, ~CM_SERVERFLAG_NO64BIT);
843 lock_ReleaseMutex(&serverp->mx);
846 void cm_SetServerNoInlineBulk(cm_server_t * serverp, int no)
848 lock_ObtainMutex(&serverp->mx);
850 _InterlockedOr(&serverp->flags, CM_SERVERFLAG_NOINLINEBULK);
852 _InterlockedAnd(&serverp->flags, ~CM_SERVERFLAG_NOINLINEBULK);
853 lock_ReleaseMutex(&serverp->mx);
856 afs_int32 cm_UpdateIFInfo(void)
859 /* get network related info */
860 cm_noIPAddr = CM_MAXINTERFACE_ADDR;
861 code = syscfg_GetIFInfo(&cm_noIPAddr,
862 cm_IPAddr, cm_SubnetMask,
863 cm_NetMtu, cm_NetFlags);
864 cm_LanAdapterChangeDetected = 0;
868 void cm_SetServerIPRank(cm_server_t * serverp)
870 unsigned long serverAddr; /* in host byte order */
871 unsigned long myAddr, myNet, mySubnet;/* in host byte order */
872 unsigned long netMask;
876 lock_ObtainRead(&cm_syscfgLock);
877 if (cm_LanAdapterChangeDetected) {
878 lock_ConvertRToW(&cm_syscfgLock);
879 if (cm_LanAdapterChangeDetected) {
880 code = cm_UpdateIFInfo();
882 lock_ConvertWToR(&cm_syscfgLock);
885 serverAddr = ntohl(serverp->addr.sin_addr.s_addr);
886 serverp->ipRank = CM_IPRANK_LOW; /* default settings */
888 for ( i=0; i < cm_noIPAddr; i++)
890 /* loop through all the client's IP address and compare
891 ** each of them against the server's IP address */
893 myAddr = cm_IPAddr[i];
894 if ( IN_CLASSA(myAddr) )
895 netMask = IN_CLASSA_NET;
896 else if ( IN_CLASSB(myAddr) )
897 netMask = IN_CLASSB_NET;
898 else if ( IN_CLASSC(myAddr) )
899 netMask = IN_CLASSC_NET;
903 myNet = myAddr & netMask;
904 mySubnet = myAddr & cm_SubnetMask[i];
906 if ( (serverAddr & netMask) == myNet )
908 if ( (serverAddr & cm_SubnetMask[i]) == mySubnet)
910 if ( serverAddr == myAddr ) {
911 serverp->ipRank = min(serverp->ipRank,
912 CM_IPRANK_TOP);/* same machine */
914 serverp->ipRank = min(serverp->ipRank,
915 CM_IPRANK_HI); /* same subnet */
918 serverp->ipRank = min(serverp->ipRank, CM_IPRANK_MED); /* same net */
921 } /* and of for loop */
922 lock_ReleaseRead(&cm_syscfgLock);
925 cm_server_t *cm_NewServer(struct sockaddr_in *socketp, int type, cm_cell_t *cellp, afsUUID *uuidp, afs_uint32 flags) {
929 osi_assertx(socketp->sin_family == AF_INET, "unexpected socket family");
931 lock_ObtainWrite(&cm_serverLock); /* get server lock */
932 tsp = cm_FindServer(socketp, type, TRUE);
934 /* we might have found a server created by set server prefs */
935 if (uuidp && !afs_uuid_is_nil(uuidp) &&
936 !(tsp->flags & CM_SERVERFLAG_UUID))
939 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_UUID);
942 if (cellp != NULL && tsp->cellp == NULL) {
944 afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
945 osi_Log3(afsd_logp, "cm_NewServer assigning server %s to cell (%u) %s",
946 osi_LogSaveString(afsd_logp,hoststr),
948 osi_LogSaveString(afsd_logp,cellp->name));
950 else if (tsp->cellp != cellp) {
951 afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
953 "cm_NewServer found a server %s associated with two cells (%u) %s and (%u) %s",
954 osi_LogSaveString(afsd_logp,hoststr),
956 osi_LogSaveString(afsd_logp,tsp->cellp->name),
958 osi_LogSaveString(afsd_logp,cellp->name));
960 lock_ReleaseWrite(&cm_serverLock);
964 tsp = malloc(sizeof(*tsp));
966 memset(tsp, 0, sizeof(*tsp));
968 if (uuidp && !afs_uuid_is_nil(uuidp)) {
970 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_UUID);
973 lock_InitializeMutex(&tsp->mx, "cm_server_t mutex", LOCK_HIERARCHY_SERVER);
974 tsp->addr = *socketp;
976 osi_QAddH((osi_queue_t **)&cm_serversAllFirstp,
977 (osi_queue_t **)&cm_serversAllLastp, &tsp->allq);
990 afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
991 osi_Log3(afsd_logp, "cm_NewServer new server %s in cell (%u) %s",
992 osi_LogSaveString(afsd_logp,hoststr),
994 osi_LogSaveString(afsd_logp,cellp->name));
997 lock_ReleaseWrite(&cm_serverLock); /* release server lock */
1000 if (!(flags & CM_FLAG_NOPROBE)) {
1001 _InterlockedOr(&tsp->flags, CM_SERVERFLAG_DOWN); /* assume down; ping will mark up if available */
1002 lock_ObtainMutex(&tsp->mx);
1004 lock_ReleaseMutex(&tsp->mx);
1005 cm_PingServer(tsp); /* Obtain Capabilities and check up/down state */
1008 pthread_attr_t tattr;
1011 /* Probe the server in the background to determine if it is up or down */
1012 pthread_attr_init(&tattr);
1013 pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED);
1015 lock_ObtainMutex(&tsp->mx);
1017 lock_ReleaseMutex(&tsp->mx);
1018 pstatus = pthread_create(&phandle, &tattr, cm_PingServer, tsp);
1020 pthread_attr_destroy(&tattr);
1027 cm_FindServerByIP(afs_uint32 ipaddr, unsigned short port, int type, int locked)
1032 lock_ObtainRead(&cm_serverLock);
1034 for (tsp = cm_serversAllFirstp;
1036 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
1037 if (tsp->type == type &&
1038 tsp->addr.sin_addr.S_un.S_addr == ipaddr &&
1039 (tsp->addr.sin_port == port || tsp->addr.sin_port == 0))
1043 /* bump ref count if we found the server */
1045 cm_GetServerNoLock(tsp);
1048 lock_ReleaseRead(&cm_serverLock);
1054 cm_FindServerByUuid(afsUUID *serverUuid, int type, int locked)
1059 lock_ObtainRead(&cm_serverLock);
1061 for (tsp = cm_serversAllFirstp;
1063 tsp = (cm_server_t *)osi_QNext(&tsp->allq)) {
1064 if (tsp->type == type && afs_uuid_equal(&tsp->uuid, serverUuid))
1068 /* bump ref count if we found the server */
1070 cm_GetServerNoLock(tsp);
1073 lock_ReleaseRead(&cm_serverLock);
1078 /* find a server based on its properties */
1079 cm_server_t *cm_FindServer(struct sockaddr_in *addrp, int type, int locked)
1081 osi_assertx(addrp->sin_family == AF_INET, "unexpected socket value");
1083 return cm_FindServerByIP(addrp->sin_addr.s_addr, addrp->sin_port, type, locked);
1086 cm_server_vols_t *cm_NewServerVols(void) {
1087 cm_server_vols_t *tsvp;
1089 tsvp = malloc(sizeof(*tsvp));
1091 memset(tsvp, 0, sizeof(*tsvp));
1097 * cm_NewServerRef() returns with the allocated cm_serverRef_t
1098 * with a refCount of 1.
1100 cm_serverRef_t *cm_NewServerRef(cm_server_t *serverp, afs_uint32 volID)
1102 cm_serverRef_t *tsrp;
1103 cm_server_vols_t **tsrvpp = NULL;
1104 afs_uint32 *slotp = NULL;
1107 cm_GetServer(serverp);
1108 tsrp = malloc(sizeof(*tsrp));
1109 tsrp->server = serverp;
1110 tsrp->status = srv_not_busy;
1112 tsrp->volID = volID;
1115 /* if we have a non-zero volID, we need to add it to the list
1116 * of volumes maintained by the server. There are two phases:
1117 * (1) see if the volID is already in the list and (2) insert
1118 * it into the first empty slot if it is not.
1121 lock_ObtainMutex(&serverp->mx);
1123 tsrvpp = &serverp->vols;
1127 for (i=0; i<NUM_SERVER_VOLS; i++) {
1128 if ((*tsrvpp)->ids[i] == volID) {
1131 } else if (!slotp && (*tsrvpp)->ids[i] == 0) {
1132 slotp = &(*tsrvpp)->ids[i];
1139 tsrvpp = &(*tsrvpp)->nextp;
1146 /* if we didn't find an empty slot in a current
1147 * page we must need a new page */
1148 *tsrvpp = cm_NewServerVols();
1150 (*tsrvpp)->ids[0] = volID;
1154 lock_ReleaseMutex(&serverp->mx);
1160 void cm_GetServerRef(cm_serverRef_t *tsrp, int locked)
1165 lock_ObtainRead(&cm_serverLock);
1166 refCount = InterlockedIncrement(&tsrp->refCount);
1168 lock_ReleaseRead(&cm_serverLock);
1171 afs_int32 cm_PutServerRef(cm_serverRef_t *tsrp, int locked)
1176 lock_ObtainRead(&cm_serverLock);
1177 refCount = InterlockedDecrement(&tsrp->refCount);
1178 osi_assertx(refCount >= 0, "cm_serverRef_t refCount underflow");
1181 lock_ReleaseRead(&cm_serverLock);
1187 cm_ServerListSize(cm_serverRef_t* serversp)
1189 afs_uint32 count = 0;
1190 cm_serverRef_t *tsrp;
1192 lock_ObtainRead(&cm_serverLock);
1193 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1194 if (tsrp->status == srv_deleted)
1198 lock_ReleaseRead(&cm_serverLock);
1202 LONG_PTR cm_ChecksumServerList(cm_serverRef_t *serversp)
1206 cm_serverRef_t *tsrp;
1208 lock_ObtainRead(&cm_serverLock);
1209 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1210 if (tsrp->status == srv_deleted)
1216 sum ^= (LONG_PTR) tsrp->server;
1219 lock_ReleaseRead(&cm_serverLock);
1224 ** Insert a server into the server list keeping the list sorted in
1225 ** ascending order of ipRank.
1227 ** The refCount of the cm_serverRef_t is not altered.
1229 void cm_InsertServerList(cm_serverRef_t** list, cm_serverRef_t* element)
1231 cm_serverRef_t *current;
1232 unsigned short rank;
1234 lock_ObtainWrite(&cm_serverLock);
1236 * Since we are grabbing the serverLock exclusively remove any
1237 * deleted serverRef objects with a zero refcount before
1238 * inserting the new item.
1241 cm_serverRef_t **currentp = list;
1242 cm_serverRef_t **nextp = NULL;
1243 cm_serverRef_t * next = NULL;
1244 cm_server_t * serverp = NULL;
1246 for (currentp = list; *currentp; currentp = nextp)
1248 nextp = &(*currentp)->next;
1249 /* obtain a refcnt on next in case cm_serverLock is dropped */
1251 cm_GetServerRef(*nextp, TRUE);
1252 if ((*currentp)->refCount == 0 &&
1253 (*currentp)->status == srv_deleted) {
1256 if ((*currentp)->volID)
1257 cm_RemoveVolumeFromServer((*currentp)->server, (*currentp)->volID);
1258 serverp = (*currentp)->server;
1261 /* cm_FreeServer will drop cm_serverLock if serverp->refCount == 0 */
1262 cm_FreeServer(serverp);
1264 /* drop the next refcnt obtained above. */
1266 cm_PutServerRef(*nextp, TRUE);
1270 /* insertion into empty list or at the beginning of the list */
1273 element->next = NULL;
1279 * Now that deleted entries have been removed and we know that the
1280 * list was not empty, look for duplicates. If the element we are
1281 * inserting already exists, discard it.
1283 for ( current = *list; current; current = current->next)
1285 cm_server_t * server1 = current->server;
1286 cm_server_t * server2 = element->server;
1288 if (current->status == srv_deleted)
1291 if (server1->type != server2->type)
1294 if (server1->addr.sin_addr.s_addr != server2->addr.sin_addr.s_addr)
1297 if ((server1->flags & CM_SERVERFLAG_UUID) != (server2->flags & CM_SERVERFLAG_UUID))
1300 if ((server1->flags & CM_SERVERFLAG_UUID) &&
1301 !afs_uuid_equal(&server1->uuid, &server2->uuid))
1304 /* we must have a match, discard the new element */
1309 rank = element->server->activeRank;
1311 /* insertion at the beginning of the list */
1312 if ((*list)->server->activeRank > rank)
1314 element->next = *list;
1319 /* find appropriate place to insert */
1320 for ( current = *list; current->next; current = current->next)
1322 if ( current->next->server->activeRank > rank )
1325 element->next = current->next;
1326 current->next = element;
1329 lock_ReleaseWrite(&cm_serverLock);
1332 ** Re-sort the server list with the modified rank
1333 ** returns 0 if element was changed successfully.
1334 ** returns 1 if list remained unchanged.
1336 long cm_ChangeRankServer(cm_serverRef_t** list, cm_server_t* server)
1338 cm_serverRef_t **current;
1339 cm_serverRef_t *element;
1341 lock_ObtainWrite(&cm_serverLock);
1345 /* if there is max of one element in the list, nothing to sort */
1346 if ( (!*current) || !((*current)->next) ) {
1347 lock_ReleaseWrite(&cm_serverLock);
1348 return 1; /* list unchanged: return success */
1351 /* if the server is on the list, delete it from list */
1354 if ( (*current)->server == server)
1356 element = (*current);
1357 *current = element->next; /* delete it */
1360 current = & ( (*current)->next);
1362 lock_ReleaseWrite(&cm_serverLock);
1364 /* if this volume is not replicated on this server */
1366 return 1; /* server is not on list */
1368 /* re-insert deleted element into the list with modified rank*/
1369 cm_InsertServerList(list, element);
1374 ** If there are more than one server on the list and the first n servers on
1375 ** the list have the same rank( n>1), then randomise among the first n servers.
1377 void cm_RandomizeServer(cm_serverRef_t** list)
1380 cm_serverRef_t* tsrp, *lastTsrp;
1381 unsigned short lowestRank;
1383 lock_ObtainWrite(&cm_serverLock);
1386 /* an empty list or a list with only one element */
1387 if ( !tsrp || ! tsrp->next ) {
1388 lock_ReleaseWrite(&cm_serverLock);
1392 /* count the number of servers with the lowest rank */
1393 lowestRank = tsrp->server->activeRank;
1394 for ( count=1, tsrp=tsrp->next; tsrp; tsrp=tsrp->next)
1396 if ( tsrp->server->activeRank != lowestRank)
1402 /* if there is only one server with the lowest rank, we are done */
1404 lock_ReleaseWrite(&cm_serverLock);
1408 picked = rand() % count;
1410 lock_ReleaseWrite(&cm_serverLock);
1415 while (--picked >= 0)
1420 lastTsrp->next = tsrp->next; /* delete random element from list*/
1421 tsrp->next = *list; /* insert element at the beginning of list */
1423 lock_ReleaseWrite(&cm_serverLock);
1426 /* call cm_FreeServer while holding a write lock on cm_serverLock */
1427 void cm_FreeServer(cm_server_t* serverp)
1429 cm_server_vols_t * tsrvp, *nextp;
1432 cm_PutServerNoLock(serverp);
1433 if (serverp->refCount == 0)
1436 * we need to check to ensure that all of the connections
1437 * for this server have a 0 refCount; otherwise, they will
1438 * not be garbage collected
1440 * must drop the cm_serverLock because cm_GCConnections
1441 * obtains the cm_connLock and that comes first in the
1444 lock_ReleaseWrite(&cm_serverLock);
1445 cm_GCConnections(serverp); /* connsp */
1446 lock_ObtainWrite(&cm_serverLock);
1451 * Once we have the cm_serverLock locked check to make
1452 * sure the refCount is still zero before removing the
1455 if (serverp->refCount == 0) {
1456 if (!(serverp->flags & CM_SERVERFLAG_PREF_SET)) {
1457 osi_QRemoveHT((osi_queue_t **)&cm_serversAllFirstp,
1458 (osi_queue_t **)&cm_serversAllLastp,
1461 switch (serverp->type) {
1462 case CM_SERVER_VLDB:
1463 cm_numVldbServers--;
1465 case CM_SERVER_FILE:
1466 cm_numFileServers--;
1470 lock_FinalizeMutex(&serverp->mx);
1472 /* free the volid list */
1473 for ( tsrvp = serverp->vols; tsrvp; tsrvp = nextp) {
1474 nextp = tsrvp->nextp;
1483 /* Called with cm_serverLock write locked */
1484 void cm_RemoveVolumeFromServer(cm_server_t * serverp, afs_uint32 volID)
1486 cm_server_vols_t * tsrvp;
1492 for (tsrvp = serverp->vols; tsrvp; tsrvp = tsrvp->nextp) {
1493 for (i=0; i<NUM_SERVER_VOLS; i++) {
1494 if (tsrvp->ids[i] == volID) {
1502 int cm_IsServerListEmpty(cm_serverRef_t *serversp)
1504 cm_serverRef_t *tsrp;
1507 if (serversp == NULL)
1508 return CM_ERROR_EMPTY;
1510 lock_ObtainRead(&cm_serverLock);
1511 for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
1512 if (tsrp->status == srv_deleted)
1517 lock_ReleaseRead(&cm_serverLock);
1519 return ( allDeleted ? CM_ERROR_EMPTY : 0 );
1522 void cm_AppendServerList(cm_serverRef_t *dest, cm_serverRef_t **src)
1524 cm_serverRef_t *ref;
1526 if (dest == NULL || src == NULL || *src == NULL)
1529 for (ref = dest; ref->next != NULL; ref = ref->next);
1536 void cm_FreeServerList(cm_serverRef_t** list, afs_uint32 flags)
1538 cm_serverRef_t **current;
1539 cm_serverRef_t **nextp;
1540 cm_serverRef_t * next;
1541 cm_server_t * serverp;
1544 lock_ObtainWrite(&cm_serverLock);
1554 nextp = &(*current)->next;
1555 /* obtain a refcnt on next in case cm_serverLock is dropped */
1557 cm_GetServerRef(*nextp, TRUE);
1558 refCount = cm_PutServerRef(*current, TRUE);
1559 if (refCount == 0) {
1562 if ((*current)->volID)
1563 cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
1564 serverp = (*current)->server;
1567 /* cm_FreeServer will drop cm_serverLock if serverp->refCount == 0 */
1568 cm_FreeServer(serverp);
1570 if (flags & CM_FREESERVERLIST_DELETE) {
1571 (*current)->status = srv_deleted;
1572 if ((*current)->volID)
1573 cm_RemoveVolumeFromServer((*current)->server, (*current)->volID);
1577 /* drop the next refcnt obtained above. */
1579 cm_PutServerRef(*current, TRUE);
1584 lock_ReleaseWrite(&cm_serverLock);
1587 /* dump all servers to a file.
1588 * cookie is used to identify this batch for easy parsing,
1589 * and it a string provided by a caller
1591 int cm_DumpServers(FILE *outputFile, char *cookie, int lock)
1600 lock_ObtainRead(&cm_serverLock);
1603 "%s - dumping servers - cm_numFileServers=%d, cm_numVldbServers=%d\r\n",
1604 cookie, cm_numFileServers, cm_numVldbServers);
1605 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1607 for (tsp = cm_serversAllFirstp;
1609 tsp = (cm_server_t *)osi_QNext(&tsp->allq))
1614 switch (tsp->type) {
1615 case CM_SERVER_VLDB:
1618 case CM_SERVER_FILE:
1625 afsUUID_to_string(&tsp->uuid, uuidstr, sizeof(uuidstr));
1626 afs_inet_ntoa_r(tsp->addr.sin_addr.s_addr, hoststr);
1627 down = ctime(&tsp->downTime);
1628 down[strlen(down)-1] = '\0';
1631 "%s - tsp=0x%p cell=%s addr=%-15s port=%u uuid=%s type=%s caps=0x%x "
1632 "flags=0x%x waitCount=%u pingCount=%d rank=%u downTime=\"%s\" "
1634 cookie, tsp, tsp->cellp ? tsp->cellp->name : "", hoststr,
1635 ntohs(tsp->addr.sin_port), uuidstr, type,
1636 tsp->capabilities, tsp->flags, tsp->waitCount, tsp->pingCount,
1638 (tsp->flags & CM_SERVERFLAG_DOWN) ? "down" : "up",
1640 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1642 sprintf(output, "%s - Done dumping servers.\r\n", cookie);
1643 WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
1646 lock_ReleaseRead(&cm_serverLock);
1652 * Determine if two servers are in fact the same.
1654 * Returns 1 if they match, 0 if they do not
1656 int cm_ServerEqual(cm_server_t *srv1, cm_server_t *srv2)
1660 if (srv1 == NULL || srv2 == NULL)
1666 if (srv1->flags & CM_SERVERFLAG_UUID) {
1667 if (!(srv2->flags & CM_SERVERFLAG_UUID))
1670 /* Both support UUID */
1671 if (UuidEqual((UUID *)&srv1->uuid, (UUID *)&srv2->uuid, &status))
1674 if (srv2->flags & CM_SERVERFLAG_UUID)
1677 /* Neither support UUID so perform an addr/port comparison */
1678 if ( srv1->addr.sin_family == srv2->addr.sin_family &&
1679 srv1->addr.sin_addr.s_addr == srv2->addr.sin_addr.s_addr &&
1680 srv1->addr.sin_port == srv2->addr.sin_port )