2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #ifdef IGNORE_SOME_GCC_WARNINGS
17 # pragma GCC diagnostic warning "-Wstrict-prototypes"
21 #include "afsincludes.h"
24 #include <afs/pthread_glock.h>
28 #include <afs/rxgen_consts.h>
29 #define UBIK_LEGACY_CALLITER
32 short ubik_initializationState; /*!< initial state is zero */
36 * \brief Parse list for clients.
39 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
48 inServer = 0; /* haven't seen -servers yet */
50 for (i = 1; i < argc; i++) {
51 /* look for -servers argument */
57 /* otherwise this is a new host name */
59 th = gethostbyname(tp);
64 memmove((void *)&temp, (const void *)th->h_addr,
67 if (counter++ >= MAXSERVERS)
71 /* haven't seen a -server yet */
72 if (!strcmp(tp, "-servers")) {
78 /* never saw a -server */
81 if (counter < MAXSERVERS)
82 *aothers++ = 0; /* null terminate if room */
86 #ifdef AFS_PTHREAD_ENV
89 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
90 static int called_afs_random_once;
91 static pthread_key_t random_number_key;
96 osi_Assert(pthread_key_create(&random_number_key, NULL) == 0);
97 called_afs_random_once = 1;
102 #if !defined(UKERNEL)
104 * \brief use time and pid to try to get some initial randomness.
106 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
109 * \brief Random number generator and constants from KnuthV2 2d ed, p170
112 * X = (aX + c) % m \n
113 * m is a power of two \n
115 * a is 0.73m should be 0.01m .. 0.99m \n
116 * c is more or less immaterial. 1 or a is suggested. \n
118 * NB: LOW ORDER BITS are not very random. To get small random numbers,
119 * treat result as <1, with implied binary point, and multiply by
122 * NB: Has to be unsigned, since shifts on signed quantities may preserve
125 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
126 * is used because it has some interesting characteristics (lacks any
127 * interesting bit-patterns).
132 #ifdef AFS_PTHREAD_ENV
135 if (!called_afs_random_once)
136 pthread_once(&random_once, afs_random_once);
138 state = (uintptr_t) pthread_getspecific(random_number_key);
140 static afs_uint32 state = 0;
145 state = time(0) + getpid();
146 for (i = 0; i < 15; i++) {
152 #ifdef AFS_PTHREAD_ENV
153 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
160 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
161 * the low bits, as the low bits are "less random" than the high ones...
163 * \todo Slight roundoff error exists, an excercise for the reader.
165 * Need to multiply by something with lots of ones in it, so multiply by
166 * 8 or 16 is right out.
169 afs_randomMod15(void)
173 temp = afs_random() >> 4;
174 temp = (temp * 15) >> 28;
178 #endif /* !defined(UKERNEL) */
183 #define abs(a) ((a) < 0 ? -1*(a) : (a))
185 ubik_ClientInit(struct rx_connection **serverconns,
186 struct ubik_client **aclient)
191 struct ubik_client *tc;
193 initialize_U_error_table();
195 if (*aclient) { /* the application is doing a re-initialization */
196 LOCK_UBIK_CLIENT((*aclient));
197 /* this is an important defensive check */
198 if (!((*aclient)->initializationState)) {
199 UNLOCK_UBIK_CLIENT((*aclient));
200 return UREINITIALIZE;
203 /* release all existing connections */
204 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
205 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
208 #ifdef AFS_PTHREAD_ENV
209 rx_ReleaseCachedConnection(rxConn);
211 rx_DestroyConnection(rxConn);
214 UNLOCK_UBIK_CLIENT((*aclient));
215 #ifdef AFS_PTHREAD_ENV
216 if (pthread_mutex_destroy(&((*aclient)->cm)))
217 return UMUTEXDESTROY;
220 tc = malloc(sizeof(struct ubik_client));
224 memset((void *)tc, 0, sizeof(*tc));
225 #ifdef AFS_PTHREAD_ENV
226 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
230 tc->initializationState = ++ubik_initializationState;
232 /* first count the # of server conns so we can randomize properly */
234 for (i = 0; i < MAXSERVERS; i++) {
235 if (serverconns[i] == (struct rx_connection *)0)
240 /* here count is the # of servers we're actually passed in. Compute
241 * offset, a number between 0..count-1, where we'll start copying from the
242 * client-provided array. */
243 for (i = 0; i < count; i++) {
244 offset = afs_randomMod15() % count;
245 for (j = abs(offset); j < 2 * count; j++) {
246 if (!tc->conns[abs(j % count)]) {
247 tc->conns[abs(j % count)] = serverconns[i];
258 * \brief Destroy an ubik connection.
260 * It calls rx to destroy the component rx connections, then frees the ubik
261 * connection structure.
264 ubik_ClientDestroy(struct ubik_client * aclient)
270 LOCK_UBIK_CLIENT(aclient);
271 for (c = 0; c < MAXSERVERS; c++) {
272 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
275 #ifdef AFS_PTHREAD_ENV
276 rx_ReleaseCachedConnection(rxConn);
278 rx_DestroyConnection(rxConn);
281 aclient->initializationState = 0; /* client in not initialized */
282 UNLOCK_UBIK_CLIENT(aclient);
283 #ifdef AFS_PTHREAD_ENV
284 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
291 * \brief So that intermittent failures that cause connections to die
292 * don't kill whole ubik connection, refresh them when the connection is in
295 struct rx_connection *
296 ubik_RefreshConn(struct rx_connection *tc)
301 struct rx_securityClass *sc;
303 struct rx_connection *newTc;
305 host = rx_HostOf(rx_PeerOf(tc));
306 port = rx_PortOf(rx_PeerOf(tc));
307 service = rx_ServiceIdOf(tc);
308 sc = rx_SecurityObjectOf(tc);
309 si = rx_SecurityClassOf(tc);
312 * destroy old one after creating new one so that refCount on security
313 * object cannot reach zero.
315 newTc = rx_NewConnection(host, port, service, sc, si);
316 rx_DestroyConnection(tc);
320 #ifdef AFS_PTHREAD_ENV
322 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
323 pthread_mutex_t ubik_client_mutex;
324 #define LOCK_UCLNT_CACHE do { \
325 osi_Assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0); \
326 MUTEX_ENTER(&ubik_client_mutex); \
328 #define UNLOCK_UCLNT_CACHE MUTEX_EXIT(&ubik_client_mutex)
331 ubik_client_init_mutex(void)
333 MUTEX_INIT(&ubik_client_mutex, "client init", MUTEX_DEFAULT, 0);
338 #define LOCK_UCLNT_CACHE
339 #define UNLOCK_UCLNT_CACHE
344 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
345 static int synccount = 0;
350 * \brief Call this after getting back a #UNOTSYNC.
352 * \note Getting a #UNOTSYNC error code back does \b not guarantee
353 * that there is a sync site yet elected. However, if there is a sync
354 * site out there somewhere, and you're trying an operation that
355 * requires a sync site, ubik will return #UNOTSYNC, indicating the
356 * operation won't work until you find a sync site
359 try_GetSyncSite(struct ubik_client *aclient, afs_int32 apos)
364 afs_int32 thisHost, newHost;
365 struct rx_connection *tc;
368 origLevel = aclient->initializationState;
371 tc = aclient->conns[apos];
372 if (tc && rx_ConnError(tc)) {
373 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
379 /* now see if we can find the sync site host */
380 code = VOTE_GetSyncSite(tc, &newHost);
381 if (aclient->initializationState != origLevel) {
382 return -1; /* somebody did a ubik_ClientInit */
385 if (!code && newHost) {
386 newHost = htonl(newHost); /* convert back to network order */
389 * position count at the appropriate slot in the client
390 * structure and retry. If we can't find in slot, we'll just
391 * continue through the whole list
393 for (i = 0; i < MAXSERVERS; i++) {
394 rxp = rx_PeerOf(aclient->conns[i]);
395 thisHost = rx_HostOf(rxp);
398 } else if (thisHost == newHost) {
399 return i; /* we were told to use this one */
410 * \brief Create an internal version of ubik_CallIter that takes an additional
411 * parameter - to indicate whether the ubik client handle has already
415 CallIter(int (*aproc) (), struct ubik_client *aclient,
416 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
417 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
418 long p12, long p13, long p14, long p15, long p16, int needlock)
421 struct rx_connection *tc;
425 LOCK_UBIK_CLIENT(aclient);
427 origLevel = aclient->initializationState;
431 while (*apos < MAXSERVERS) {
432 /* tc is the next conn to try */
433 tc = aclient->conns[*apos];
437 if (rx_ConnError(tc)) {
438 tc = ubik_RefreshConn(tc);
439 aclient->conns[*apos] = tc;
442 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
443 (*apos)++; /* try another one if this server is down */
445 break; /* this is the desired path */
448 if (*apos >= MAXSERVERS)
452 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
454 if (aclient->initializationState != origLevel) {
455 /* somebody did a ubik_ClientInit */
457 code = UINTERNAL; /* no more specific error was returned */
461 /* what should I do in case of UNOQUORUM ? */
463 aclient->states[*apos] |= CFLastFailed; /* network errors */
465 /* either misc ubik code, or misc application code or success. */
466 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
472 UNLOCK_UBIK_CLIENT(aclient);
478 * \brief This is part of an iterator. It doesn't handle finding sync sites.
481 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
482 afs_int32 aflags, int *apos, long p1, long p2,
483 long p3, long p4, long p5, long p6, long p7,
484 long p8, long p9, long p10, long p11, long p12,
485 long p13, long p14, long p15, long p16)
487 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
488 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
492 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
494 * \todo In the future, we should also put in a protocol to find the sync site.
497 ubik_Call_New(int (*aproc) (), struct ubik_client *aclient,
498 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
499 long p6, long p7, long p8, long p9, long p10, long p11,
500 long p12, long p13, long p14, long p15, long p16)
502 afs_int32 code, rcode;
509 LOCK_UBIK_CLIENT(aclient);
512 origLevel = aclient->initializationState;
514 /* Do two passes. First pass only checks servers known running */
515 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
516 pass++, aflags &= ~UPUBIKONLY) {
521 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
522 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
524 if (code && (aclient->initializationState != origLevel)) {
527 if (code == UNOSERVERS) {
530 rcode = code; /* remember code from last good call */
532 if (code == UNOTSYNC) { /* means this requires a sync site */
533 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
534 temp = try_GetSyncSite(aclient, count);
535 if (aclient->initializationState != origLevel) {
536 goto restart; /* somebody did a ubik_ClientInit */
538 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
539 count = temp; /* generally try to make progress */
542 } else if ((code >= 0) && (code != UNOQUORUM)) {
543 UNLOCK_UBIK_CLIENT(aclient);
544 return code; /* success or global error condition */
548 UNLOCK_UBIK_CLIENT(aclient);
553 * call this instead of stub and we'll guarantee to find a host that's up.
555 * \todo In the future, we should also put in a protocol to find the sync site.
558 ubik_Call(int (*aproc) (), struct ubik_client *aclient,
559 afs_int32 aflags, long p1, long p2, long p3, long p4,
560 long p5, long p6, long p7, long p8, long p9, long p10,
561 long p11, long p12, long p13, long p14, long p15, long p16)
563 afs_int32 rcode, code, newHost, thisHost, i, count;
564 int chaseCount, pass, needsync, inlist, j;
565 struct rx_connection *tc;
569 if (aflags & UBIK_CALL_NEW)
570 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
571 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
576 LOCK_UBIK_CLIENT(aclient);
579 origLevel = aclient->initializationState;
581 chaseCount = inlist = needsync = 0;
584 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
585 if (calls_needsync[j] == (int *)aproc) {
586 inlist = needsync = 1;
592 * First pass, we try all servers that are up.
593 * Second pass, we try all servers.
595 for (pass = 0; pass < 2; pass++) { /*p */
596 /* For each entry in our servers list */
597 for (count = 0;; count++) { /*s */
600 /* Need a sync site. Lets try to quickly find it */
601 if (aclient->syncSite) {
602 newHost = aclient->syncSite; /* already in network order */
603 aclient->syncSite = 0; /* Will reset if it works */
604 } else if (aclient->conns[3]) {
605 /* If there are fewer than four db servers in a cell,
606 * there's no point in making the GetSyncSite call.
607 * At best, it's a wash. At worst, it results in more
608 * RPCs than you would otherwise make.
610 tc = aclient->conns[count];
611 if (tc && rx_ConnError(tc)) {
612 aclient->conns[count] = tc = ubik_RefreshConn(tc);
616 code = VOTE_GetSyncSite(tc, &newHost);
617 if (aclient->initializationState != origLevel)
618 goto restart; /* somebody did a ubik_ClientInit */
621 newHost = htonl(newHost); /* convert to network order */
626 /* position count at the appropriate slot in the client
627 * structure and retry. If we can't find in slot, we'll
628 * just continue through the whole list
630 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
631 rxp = rx_PeerOf(aclient->conns[i]);
632 thisHost = rx_HostOf(rxp);
635 if (thisHost == newHost) {
636 if (chaseCount++ > 2)
637 break; /* avoid loop asking */
638 count = i; /* this index is the sync site */
645 tc = aclient->conns[count];
646 if (tc && rx_ConnError(tc)) {
647 aclient->conns[count] = tc = ubik_RefreshConn(tc);
652 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
653 continue; /* this guy's down */
657 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
658 p12, p13, p14, p15, p16);
659 if (aclient->initializationState != origLevel) {
660 /* somebody did a ubik_ClientInit */
662 goto restart; /* call failed */
664 goto done; /* call suceeded */
666 if (rcode < 0) { /* network errors */
667 aclient->states[count] |= CFLastFailed; /* Mark serer down */
668 } else if (rcode == UNOTSYNC) {
670 } else if (rcode != UNOQUORUM) {
671 /* either misc ubik code, or misc appl code, or success. */
672 aclient->states[count] &= ~CFLastFailed; /* mark server up */
673 goto done; /* all done */
680 if (!inlist) { /* Remember proc call that needs sync site */
682 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
687 if (!rcode) { /* Remember the sync site - cmd successful */
688 rxp = rx_PeerOf(aclient->conns[count]);
689 aclient->syncSite = rx_HostOf(rxp);
692 UNLOCK_UBIK_CLIENT(aclient);