2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
17 #ifdef IGNORE_SOME_GCC_WARNINGS
18 # pragma GCC diagnostic warning "-Wstrict-prototypes"
22 #include "afs/sysincludes.h"
23 #include "afsincludes.h"
28 #include "afs/rxgen_consts.h"
29 #define UBIK_LEGACY_CALLITER 1
31 #include "afs/pthread_glock.h"
33 #else /* defined(UKERNEL) */
35 #include <afs/pthread_glock.h>
46 #include <netinet/in.h>
48 #include <afs/rxgen_consts.h>
50 #endif /* defined(UKERNEL) */
56 short ubik_initializationState; /*!< initial state is zero */
60 * \brief Parse list for clients.
63 ubik_ParseClientList(int argc, char **argv, afs_int32 * aothers)
67 register struct hostent *th;
68 afs_int32 temp, counter;
71 inServer = 0; /* haven't seen -servers yet */
73 for (i = 1; i < argc; i++) {
74 /* look for -servers argument */
80 /* otherwise this is a new host name */
82 th = gethostbyname(tp);
87 memmove((void *)&temp, (const void *)th->h_addr,
90 if (counter++ >= MAXSERVERS)
94 /* haven't seen a -server yet */
95 if (!strcmp(tp, "-servers")) {
101 /* never saw a -server */
104 if (counter < MAXSERVERS)
105 *aothers++ = 0; /* null terminate if room */
109 #ifdef AFS_PTHREAD_ENV
113 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
114 static int called_afs_random_once;
115 static pthread_key_t random_number_key;
118 afs_random_once(void)
120 assert(pthread_key_create(&random_number_key, NULL) == 0);
121 called_afs_random_once = 1;
126 #if !defined(UKERNEL)
128 * \brief use time and pid to try to get some initial randomness.
130 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
133 * \brief Random number generator and constants from KnuthV2 2d ed, p170
136 * X = (aX + c) % m \n
137 * m is a power of two \n
139 * a is 0.73m should be 0.01m .. 0.99m \n
140 * c is more or less immaterial. 1 or a is suggested. \n
142 * NB: LOW ORDER BITS are not very random. To get small random numbers,
143 * treat result as <1, with implied binary point, and multiply by
146 * NB: Has to be unsigned, since shifts on signed quantities may preserve
149 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
150 * is used because it has some interesting characteristics (lacks any
151 * interesting bit-patterns).
156 #ifdef AFS_PTHREAD_ENV
159 if (!called_afs_random_once)
160 pthread_once(&random_once, afs_random_once);
162 state = (uintptr_t) pthread_getspecific(random_number_key);
164 static afs_uint32 state = 0;
169 state = time(0) + getpid();
170 for (i = 0; i < 15; i++) {
176 #ifdef AFS_PTHREAD_ENV
177 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
184 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
185 * the low bits, as the low bits are "less random" than the high ones...
187 * \todo Slight roundoff error exists, an excercise for the reader.
189 * Need to multiply by something with lots of ones in it, so multiply by
190 * 8 or 16 is right out.
193 afs_randomMod15(void)
197 temp = afs_random() >> 4;
198 temp = (temp * 15) >> 28;
202 #endif /* !defined(UKERNEL) */
207 #define abs(a) ((a) < 0 ? -1*(a) : (a))
209 ubik_ClientInit(register struct rx_connection **serverconns,
210 struct ubik_client **aclient)
215 register struct ubik_client *tc;
217 initialize_U_error_table();
219 if (*aclient) { /* the application is doing a re-initialization */
220 LOCK_UBIK_CLIENT((*aclient));
221 /* this is an important defensive check */
222 if (!((*aclient)->initializationState)) {
223 UNLOCK_UBIK_CLIENT((*aclient));
224 return UREINITIALIZE;
227 /* release all existing connections */
228 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
229 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
232 #ifdef AFS_PTHREAD_ENV
233 rx_ReleaseCachedConnection(rxConn);
235 rx_DestroyConnection(rxConn);
238 UNLOCK_UBIK_CLIENT((*aclient));
239 #ifdef AFS_PTHREAD_ENV
240 if (pthread_mutex_destroy(&((*aclient)->cm)))
241 return UMUTEXDESTROY;
244 tc = (struct ubik_client *)malloc(sizeof(struct ubik_client));
248 memset((void *)tc, 0, sizeof(*tc));
249 #ifdef AFS_PTHREAD_ENV
250 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
254 tc->initializationState = ++ubik_initializationState;
256 /* first count the # of server conns so we can randomize properly */
258 for (i = 0; i < MAXSERVERS; i++) {
259 if (serverconns[i] == (struct rx_connection *)0)
264 /* here count is the # of servers we're actually passed in. Compute
265 * offset, a number between 0..count-1, where we'll start copying from the
266 * client-provided array. */
267 for (i = 0; i < count; i++) {
268 offset = afs_randomMod15() % count;
269 for (j = abs(offset); j < 2 * count; j++) {
270 if (!tc->conns[abs(j % count)]) {
271 tc->conns[abs(j % count)] = serverconns[i];
282 * \brief Destroy an ubik connection.
284 * It calls rx to destroy the component rx connections, then frees the ubik
285 * connection structure.
288 ubik_ClientDestroy(struct ubik_client * aclient)
294 LOCK_UBIK_CLIENT(aclient);
295 for (c = 0; c < MAXSERVERS; c++) {
296 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
299 #ifdef AFS_PTHREAD_ENV
300 rx_ReleaseCachedConnection(rxConn);
302 rx_DestroyConnection(rxConn);
305 aclient->initializationState = 0; /* client in not initialized */
306 UNLOCK_UBIK_CLIENT(aclient);
307 #ifdef AFS_PTHREAD_ENV
308 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
315 * \brief So that intermittent failures that cause connections to die
316 * don't kill whole ubik connection, refresh them when the connection is in
319 struct rx_connection *
320 ubik_RefreshConn(struct rx_connection *tc)
325 struct rx_securityClass *sc;
327 struct rx_connection *newTc;
329 host = rx_HostOf(rx_PeerOf(tc));
330 port = rx_PortOf(rx_PeerOf(tc));
331 service = rx_ServiceIdOf(tc);
332 sc = rx_SecurityObjectOf(tc);
333 si = rx_SecurityClassOf(tc);
336 * destroy old one after creating new one so that refCount on security
337 * object cannot reach zero.
339 newTc = rx_NewConnection(host, port, service, sc, si);
340 rx_DestroyConnection(tc);
344 #ifdef AFS_PTHREAD_ENV
346 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
347 pthread_mutex_t ubik_client_mutex;
348 #define LOCK_UCLNT_CACHE \
349 assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0 && \
350 pthread_mutex_lock(&ubik_client_mutex)==0)
351 #define UNLOCK_UCLNT_CACHE assert(pthread_mutex_unlock(&ubik_client_mutex)==0)
354 ubik_client_init_mutex(void)
356 assert(pthread_mutex_init(&ubik_client_mutex, NULL) == 0);
361 #define LOCK_UCLNT_CACHE
362 #define UNLOCK_UCLNT_CACHE
367 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
368 static int synccount = 0;
373 * \brief Call this after getting back a #UNOTSYNC.
375 * \note Getting a #UNOTSYNC error code back does \b not guarantee
376 * that there is a sync site yet elected. However, if there is a sync
377 * site out there somewhere, and you're trying an operation that
378 * requires a sync site, ubik will return #UNOTSYNC, indicating the
379 * operation won't work until you find a sync site
382 try_GetSyncSite(register struct ubik_client *aclient, afs_int32 apos)
387 afs_int32 thisHost, newHost;
388 struct rx_connection *tc;
391 origLevel = aclient->initializationState;
394 tc = aclient->conns[apos];
395 if (tc && rx_ConnError(tc)) {
396 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
402 /* now see if we can find the sync site host */
403 code = VOTE_GetSyncSite(tc, &newHost);
404 if (aclient->initializationState != origLevel) {
405 return -1; /* somebody did a ubik_ClientInit */
408 if (!code && newHost) {
409 newHost = htonl(newHost); /* convert back to network order */
412 * position count at the appropriate slot in the client
413 * structure and retry. If we can't find in slot, we'll just
414 * continue through the whole list
416 for (i = 0; i < MAXSERVERS; i++) {
417 rxp = rx_PeerOf(aclient->conns[i]);
418 thisHost = rx_HostOf(rxp);
421 } else if (thisHost == newHost) {
422 return i; /* we were told to use this one */
433 * \brief Create an internal version of ubik_CallIter that takes an additional
434 * parameter - to indicate whether the ubik client handle has already
438 CallIter(int (*aproc) (), register struct ubik_client *aclient,
439 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
440 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
441 long p12, long p13, long p14, long p15, long p16, int needlock)
443 register afs_int32 code;
444 struct rx_connection *tc;
448 LOCK_UBIK_CLIENT(aclient);
450 origLevel = aclient->initializationState;
454 while (*apos < MAXSERVERS) {
455 /* tc is the next conn to try */
456 tc = aclient->conns[*apos];
459 UNLOCK_UBIK_CLIENT(aclient);
464 if (rx_ConnError(tc)) {
465 tc = ubik_RefreshConn(tc);
466 aclient->conns[*apos] = tc;
469 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
470 (*apos)++; /* try another one if this server is down */
472 break; /* this is the desired path */
475 if (*apos >= MAXSERVERS) {
477 UNLOCK_UBIK_CLIENT(aclient);
483 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
485 if (aclient->initializationState != origLevel) {
487 UNLOCK_UBIK_CLIENT(aclient);
489 return code; /* somebody did a ubik_ClientInit */
492 /* what should I do in case of UNOQUORUM ? */
494 aclient->states[*apos] |= CFLastFailed; /* network errors */
496 /* either misc ubik code, or misc application code or success. */
497 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
502 UNLOCK_UBIK_CLIENT(aclient);
508 * \brief This is part of an iterator. It doesn't handle finding sync sites.
511 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
512 afs_int32 aflags, int *apos, long p1, long p2,
513 long p3, long p4, long p5, long p6, long p7,
514 long p8, long p9, long p10, long p11, long p12,
515 long p13, long p14, long p15, long p16)
517 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
518 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
522 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
524 * \todo In the future, we should also put in a protocol to find the sync site.
527 ubik_Call_New(int (*aproc) (), register struct ubik_client *aclient,
528 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
529 long p6, long p7, long p8, long p9, long p10, long p11,
530 long p12, long p13, long p14, long p15, long p16)
532 afs_int32 code, rcode;
539 LOCK_UBIK_CLIENT(aclient);
542 origLevel = aclient->initializationState;
544 /* Do two passes. First pass only checks servers known running */
545 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
546 pass++, aflags &= ~UPUBIKONLY) {
551 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
552 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
554 if (code && (aclient->initializationState != origLevel)) {
557 if (code == UNOSERVERS) {
560 rcode = code; /* remember code from last good call */
562 if (code == UNOTSYNC) { /* means this requires a sync site */
563 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
564 temp = try_GetSyncSite(aclient, count);
565 if (aclient->initializationState != origLevel) {
566 goto restart; /* somebody did a ubik_ClientInit */
568 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
569 count = temp; /* generally try to make progress */
572 } else if ((code >= 0) && (code != UNOQUORUM)) {
573 UNLOCK_UBIK_CLIENT(aclient);
574 return code; /* success or global error condition */
578 UNLOCK_UBIK_CLIENT(aclient);
583 * call this instead of stub and we'll guarantee to find a host that's up.
585 * \todo In the future, we should also put in a protocol to find the sync site.
588 ubik_Call(int (*aproc) (), register struct ubik_client *aclient,
589 afs_int32 aflags, long p1, long p2, long p3, long p4,
590 long p5, long p6, long p7, long p8, long p9, long p10,
591 long p11, long p12, long p13, long p14, long p15, long p16)
593 afs_int32 rcode, code, newHost, thisHost, i, count;
594 int chaseCount, pass, needsync, inlist, j;
595 struct rx_connection *tc;
599 if (aflags & UBIK_CALL_NEW)
600 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
601 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
606 LOCK_UBIK_CLIENT(aclient);
609 origLevel = aclient->initializationState;
611 chaseCount = inlist = needsync = 0;
614 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
615 if (calls_needsync[j] == (int *)aproc) {
616 inlist = needsync = 1;
622 * First pass, we try all servers that are up.
623 * Second pass, we try all servers.
625 for (pass = 0; pass < 2; pass++) { /*p */
626 /* For each entry in our servers list */
627 for (count = 0;; count++) { /*s */
630 /* Need a sync site. Lets try to quickly find it */
631 if (aclient->syncSite) {
632 newHost = aclient->syncSite; /* already in network order */
633 aclient->syncSite = 0; /* Will reset if it works */
634 } else if (aclient->conns[3]) {
635 /* If there are fewer than four db servers in a cell,
636 * there's no point in making the GetSyncSite call.
637 * At best, it's a wash. At worst, it results in more
638 * RPCs than you would otherwise make.
640 tc = aclient->conns[count];
641 if (tc && rx_ConnError(tc)) {
642 aclient->conns[count] = tc = ubik_RefreshConn(tc);
646 code = VOTE_GetSyncSite(tc, &newHost);
647 if (aclient->initializationState != origLevel)
648 goto restart; /* somebody did a ubik_ClientInit */
651 newHost = htonl(newHost); /* convert to network order */
656 /* position count at the appropriate slot in the client
657 * structure and retry. If we can't find in slot, we'll
658 * just continue through the whole list
660 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
661 rxp = rx_PeerOf(aclient->conns[i]);
662 thisHost = rx_HostOf(rxp);
665 if (thisHost == newHost) {
666 if (chaseCount++ > 2)
667 break; /* avoid loop asking */
668 count = i; /* this index is the sync site */
675 tc = aclient->conns[count];
676 if (tc && rx_ConnError(tc)) {
677 aclient->conns[count] = tc = ubik_RefreshConn(tc);
682 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
683 continue; /* this guy's down */
687 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
688 p12, p13, p14, p15, p16);
689 if (aclient->initializationState != origLevel) {
690 /* somebody did a ubik_ClientInit */
692 goto restart; /* call failed */
694 goto done; /* call suceeded */
696 if (rcode < 0) { /* network errors */
697 aclient->states[count] |= CFLastFailed; /* Mark serer down */
698 } else if (rcode == UNOTSYNC) {
700 } else if (rcode != UNOQUORUM) {
701 /* either misc ubik code, or misc appl code, or success. */
702 aclient->states[count] &= ~CFLastFailed; /* mark server up */
703 goto done; /* all done */
710 if (!inlist) { /* Remember proc call that needs sync site */
712 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
717 if (!rcode) { /* Remember the sync site - cmd successful */
718 rxp = rx_PeerOf(aclient->conns[count]);
719 aclient->syncSite = rx_HostOf(rxp);
722 UNLOCK_UBIK_CLIENT(aclient);