2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #ifdef IGNORE_SOME_GCC_WARNINGS
17 # pragma GCC diagnostic warning "-Wstrict-prototypes"
21 #include "afsincludes.h"
24 #include <afs/pthread_glock.h>
28 #include <afs/rxgen_consts.h>
29 #define UBIK_LEGACY_CALLITER
32 short ubik_initializationState; /*!< initial state is zero */
36 * \brief Parse list for clients.
39 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
48 inServer = 0; /* haven't seen -servers yet */
50 for (i = 1; i < argc; i++) {
51 /* look for -servers argument */
57 /* otherwise this is a new host name */
59 th = gethostbyname(tp);
64 memmove((void *)&temp, (const void *)th->h_addr,
67 if (counter++ >= MAXSERVERS)
71 /* haven't seen a -server yet */
72 if (!strcmp(tp, "-servers")) {
78 /* never saw a -server */
81 if (counter < MAXSERVERS)
82 *aothers++ = 0; /* null terminate if room */
86 #ifdef AFS_PTHREAD_ENV
89 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
90 static int called_afs_random_once;
91 static pthread_key_t random_number_key;
96 osi_Assert(pthread_key_create(&random_number_key, NULL) == 0);
97 called_afs_random_once = 1;
102 #if !defined(UKERNEL)
104 * \brief use time and pid to try to get some initial randomness.
106 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
109 * \brief Random number generator and constants from KnuthV2 2d ed, p170
112 * X = (aX + c) % m \n
113 * m is a power of two \n
115 * a is 0.73m should be 0.01m .. 0.99m \n
116 * c is more or less immaterial. 1 or a is suggested. \n
118 * NB: LOW ORDER BITS are not very random. To get small random numbers,
119 * treat result as <1, with implied binary point, and multiply by
122 * NB: Has to be unsigned, since shifts on signed quantities may preserve
125 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
126 * is used because it has some interesting characteristics (lacks any
127 * interesting bit-patterns).
132 #ifdef AFS_PTHREAD_ENV
135 if (!called_afs_random_once)
136 pthread_once(&random_once, afs_random_once);
138 state = (uintptr_t) pthread_getspecific(random_number_key);
140 static afs_uint32 state = 0;
145 state = time(0) + getpid();
146 for (i = 0; i < 15; i++) {
152 #ifdef AFS_PTHREAD_ENV
153 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
160 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
161 * the low bits, as the low bits are "less random" than the high ones...
163 * \todo Slight roundoff error exists, an excercise for the reader.
165 * Need to multiply by something with lots of ones in it, so multiply by
166 * 8 or 16 is right out.
169 afs_randomMod15(void)
173 temp = afs_random() >> 4;
174 temp = (temp * 15) >> 28;
178 #endif /* !defined(UKERNEL) */
183 #define abs(a) ((a) < 0 ? -1*(a) : (a))
185 ubik_ClientInit(struct rx_connection **serverconns,
186 struct ubik_client **aclient)
191 struct ubik_client *tc;
193 initialize_U_error_table();
195 if (*aclient) { /* the application is doing a re-initialization */
196 LOCK_UBIK_CLIENT((*aclient));
197 /* this is an important defensive check */
198 if (!((*aclient)->initializationState)) {
199 UNLOCK_UBIK_CLIENT((*aclient));
200 return UREINITIALIZE;
203 /* release all existing connections */
204 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
205 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
208 #ifdef AFS_PTHREAD_ENV
209 rx_ReleaseCachedConnection(rxConn);
211 rx_DestroyConnection(rxConn);
214 UNLOCK_UBIK_CLIENT((*aclient));
215 #ifdef AFS_PTHREAD_ENV
216 if (pthread_mutex_destroy(&((*aclient)->cm)))
217 return UMUTEXDESTROY;
220 tc = malloc(sizeof(struct ubik_client));
224 memset((void *)tc, 0, sizeof(*tc));
225 #ifdef AFS_PTHREAD_ENV
226 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
230 tc->initializationState = ++ubik_initializationState;
232 /* first count the # of server conns so we can randomize properly */
234 for (i = 0; i < MAXSERVERS; i++) {
235 if (serverconns[i] == (struct rx_connection *)0)
240 /* here count is the # of servers we're actually passed in. Compute
241 * offset, a number between 0..count-1, where we'll start copying from the
242 * client-provided array. */
243 for (i = 0; i < count; i++) {
244 offset = afs_randomMod15() % count;
245 for (j = abs(offset); j < 2 * count; j++) {
246 if (!tc->conns[abs(j % count)]) {
247 tc->conns[abs(j % count)] = serverconns[i];
258 * \brief Destroy an ubik connection.
260 * It calls rx to destroy the component rx connections, then frees the ubik
261 * connection structure.
264 ubik_ClientDestroy(struct ubik_client * aclient)
270 LOCK_UBIK_CLIENT(aclient);
271 for (c = 0; c < MAXSERVERS; c++) {
272 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
275 #ifdef AFS_PTHREAD_ENV
276 rx_ReleaseCachedConnection(rxConn);
278 rx_DestroyConnection(rxConn);
281 aclient->initializationState = 0; /* client in not initialized */
282 UNLOCK_UBIK_CLIENT(aclient);
283 #ifdef AFS_PTHREAD_ENV
284 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
291 * \brief So that intermittent failures that cause connections to die
292 * don't kill whole ubik connection, refresh them when the connection is in
295 struct rx_connection *
296 ubik_RefreshConn(struct rx_connection *tc)
301 struct rx_securityClass *sc;
303 struct rx_connection *newTc;
305 host = rx_HostOf(rx_PeerOf(tc));
306 port = rx_PortOf(rx_PeerOf(tc));
307 service = rx_ServiceIdOf(tc);
308 sc = rx_SecurityObjectOf(tc);
309 si = rx_SecurityClassOf(tc);
312 * destroy old one after creating new one so that refCount on security
313 * object cannot reach zero.
315 newTc = rx_NewConnection(host, port, service, sc, si);
316 rx_DestroyConnection(tc);
320 #ifdef AFS_PTHREAD_ENV
322 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
323 pthread_mutex_t ubik_client_mutex;
324 #define LOCK_UCLNT_CACHE do { \
325 osi_Assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0); \
326 MUTEX_ENTER(&ubik_client_mutex); \
328 #define UNLOCK_UCLNT_CACHE MUTEX_EXIT(&ubik_client_mutex)
331 ubik_client_init_mutex(void)
333 MUTEX_INIT(&ubik_client_mutex, "client init", MUTEX_DEFAULT, 0);
338 #define LOCK_UCLNT_CACHE
339 #define UNLOCK_UCLNT_CACHE
344 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
345 static int synccount = 0;
350 * \brief Call this after getting back a #UNOTSYNC.
352 * \note Getting a #UNOTSYNC error code back does \b not guarantee
353 * that there is a sync site yet elected. However, if there is a sync
354 * site out there somewhere, and you're trying an operation that
355 * requires a sync site, ubik will return #UNOTSYNC, indicating the
356 * operation won't work until you find a sync site
359 try_GetSyncSite(struct ubik_client *aclient, afs_int32 apos)
364 afs_int32 thisHost, newHost;
365 struct rx_connection *tc;
368 origLevel = aclient->initializationState;
371 tc = aclient->conns[apos];
372 if (tc && rx_ConnError(tc)) {
373 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
379 /* now see if we can find the sync site host */
380 code = VOTE_GetSyncSite(tc, &newHost);
381 if (aclient->initializationState != origLevel) {
382 return -1; /* somebody did a ubik_ClientInit */
385 if (!code && newHost) {
386 newHost = htonl(newHost); /* convert back to network order */
389 * position count at the appropriate slot in the client
390 * structure and retry. If we can't find in slot, we'll just
391 * continue through the whole list
393 for (i = 0; i < MAXSERVERS; i++) {
394 rxp = rx_PeerOf(aclient->conns[i]);
395 thisHost = rx_HostOf(rxp);
398 } else if (thisHost == newHost) {
399 return i; /* we were told to use this one */
410 * \brief Create an internal version of ubik_CallIter that takes an additional
411 * parameter - to indicate whether the ubik client handle has already
415 CallIter(int (*aproc) (), struct ubik_client *aclient,
416 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
417 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
418 long p12, long p13, long p14, long p15, long p16, int needlock)
421 struct rx_connection *tc;
425 LOCK_UBIK_CLIENT(aclient);
427 origLevel = aclient->initializationState;
431 while (*apos < MAXSERVERS) {
432 /* tc is the next conn to try */
433 tc = aclient->conns[*apos];
437 if (rx_ConnError(tc)) {
438 tc = ubik_RefreshConn(tc);
439 aclient->conns[*apos] = tc;
442 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
443 (*apos)++; /* try another one if this server is down */
445 break; /* this is the desired path */
448 if (*apos >= MAXSERVERS)
452 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
454 if (aclient->initializationState != origLevel)
455 /* somebody did a ubik_ClientInit */
458 /* what should I do in case of UNOQUORUM ? */
460 aclient->states[*apos] |= CFLastFailed; /* network errors */
462 /* either misc ubik code, or misc application code or success. */
463 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
469 UNLOCK_UBIK_CLIENT(aclient);
475 * \brief This is part of an iterator. It doesn't handle finding sync sites.
478 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
479 afs_int32 aflags, int *apos, long p1, long p2,
480 long p3, long p4, long p5, long p6, long p7,
481 long p8, long p9, long p10, long p11, long p12,
482 long p13, long p14, long p15, long p16)
484 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
485 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
489 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
491 * \todo In the future, we should also put in a protocol to find the sync site.
494 ubik_Call_New(int (*aproc) (), struct ubik_client *aclient,
495 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
496 long p6, long p7, long p8, long p9, long p10, long p11,
497 long p12, long p13, long p14, long p15, long p16)
499 afs_int32 code, rcode;
506 LOCK_UBIK_CLIENT(aclient);
509 origLevel = aclient->initializationState;
511 /* Do two passes. First pass only checks servers known running */
512 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
513 pass++, aflags &= ~UPUBIKONLY) {
518 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
519 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
521 if (code && (aclient->initializationState != origLevel)) {
524 if (code == UNOSERVERS) {
527 rcode = code; /* remember code from last good call */
529 if (code == UNOTSYNC) { /* means this requires a sync site */
530 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
531 temp = try_GetSyncSite(aclient, count);
532 if (aclient->initializationState != origLevel) {
533 goto restart; /* somebody did a ubik_ClientInit */
535 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
536 count = temp; /* generally try to make progress */
539 } else if ((code >= 0) && (code != UNOQUORUM)) {
540 UNLOCK_UBIK_CLIENT(aclient);
541 return code; /* success or global error condition */
545 UNLOCK_UBIK_CLIENT(aclient);
550 * call this instead of stub and we'll guarantee to find a host that's up.
552 * \todo In the future, we should also put in a protocol to find the sync site.
555 ubik_Call(int (*aproc) (), struct ubik_client *aclient,
556 afs_int32 aflags, long p1, long p2, long p3, long p4,
557 long p5, long p6, long p7, long p8, long p9, long p10,
558 long p11, long p12, long p13, long p14, long p15, long p16)
560 afs_int32 rcode, code, newHost, thisHost, i, count;
561 int chaseCount, pass, needsync, inlist, j;
562 struct rx_connection *tc;
566 if (aflags & UBIK_CALL_NEW)
567 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
568 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
573 LOCK_UBIK_CLIENT(aclient);
576 origLevel = aclient->initializationState;
578 chaseCount = inlist = needsync = 0;
581 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
582 if (calls_needsync[j] == (int *)aproc) {
583 inlist = needsync = 1;
589 * First pass, we try all servers that are up.
590 * Second pass, we try all servers.
592 for (pass = 0; pass < 2; pass++) { /*p */
593 /* For each entry in our servers list */
594 for (count = 0;; count++) { /*s */
597 /* Need a sync site. Lets try to quickly find it */
598 if (aclient->syncSite) {
599 newHost = aclient->syncSite; /* already in network order */
600 aclient->syncSite = 0; /* Will reset if it works */
601 } else if (aclient->conns[3]) {
602 /* If there are fewer than four db servers in a cell,
603 * there's no point in making the GetSyncSite call.
604 * At best, it's a wash. At worst, it results in more
605 * RPCs than you would otherwise make.
607 tc = aclient->conns[count];
608 if (tc && rx_ConnError(tc)) {
609 aclient->conns[count] = tc = ubik_RefreshConn(tc);
613 code = VOTE_GetSyncSite(tc, &newHost);
614 if (aclient->initializationState != origLevel)
615 goto restart; /* somebody did a ubik_ClientInit */
618 newHost = htonl(newHost); /* convert to network order */
623 /* position count at the appropriate slot in the client
624 * structure and retry. If we can't find in slot, we'll
625 * just continue through the whole list
627 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
628 rxp = rx_PeerOf(aclient->conns[i]);
629 thisHost = rx_HostOf(rxp);
632 if (thisHost == newHost) {
633 if (chaseCount++ > 2)
634 break; /* avoid loop asking */
635 count = i; /* this index is the sync site */
642 tc = aclient->conns[count];
643 if (tc && rx_ConnError(tc)) {
644 aclient->conns[count] = tc = ubik_RefreshConn(tc);
649 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
650 continue; /* this guy's down */
654 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
655 p12, p13, p14, p15, p16);
656 if (aclient->initializationState != origLevel) {
657 /* somebody did a ubik_ClientInit */
659 goto restart; /* call failed */
661 goto done; /* call suceeded */
663 if (rcode < 0) { /* network errors */
664 aclient->states[count] |= CFLastFailed; /* Mark serer down */
665 } else if (rcode == UNOTSYNC) {
667 } else if (rcode != UNOQUORUM) {
668 /* either misc ubik code, or misc appl code, or success. */
669 aclient->states[count] &= ~CFLastFailed; /* mark server up */
670 goto done; /* all done */
677 if (!inlist) { /* Remember proc call that needs sync site */
679 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
684 if (!rcode) { /* Remember the sync site - cmd successful */
685 rxp = rx_PeerOf(aclient->conns[count]);
686 aclient->syncSite = rx_HostOf(rxp);
689 UNLOCK_UBIK_CLIENT(aclient);