2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #ifdef AFS_PTHREAD_ENV
17 # include <opr/lock.h>
20 #ifdef IGNORE_SOME_GCC_WARNINGS
21 # pragma GCC diagnostic warning "-Wstrict-prototypes"
25 #include "afsincludes.h"
28 #include <afs/pthread_glock.h>
32 #include <afs/rxgen_consts.h>
33 #define UBIK_LEGACY_CALLITER
36 short ubik_initializationState; /*!< initial state is zero */
40 * \brief Parse list for clients.
43 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
52 inServer = 0; /* haven't seen -servers yet */
54 for (i = 1; i < argc; i++) {
55 /* look for -servers argument */
61 /* otherwise this is a new host name */
63 th = gethostbyname(tp);
68 memmove((void *)&temp, (const void *)th->h_addr,
71 if (counter++ >= MAXSERVERS)
75 /* haven't seen a -server yet */
76 if (!strcmp(tp, "-servers")) {
82 /* never saw a -server */
85 if (counter < MAXSERVERS)
86 *aothers++ = 0; /* null terminate if room */
90 #ifdef AFS_PTHREAD_ENV
93 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
94 static int called_afs_random_once;
95 static pthread_key_t random_number_key;
100 opr_Verify(pthread_key_create(&random_number_key, NULL) == 0);
101 called_afs_random_once = 1;
106 #if !defined(UKERNEL)
108 * \brief use time and pid to try to get some initial randomness.
110 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
113 * \brief Random number generator and constants from KnuthV2 2d ed, p170
116 * X = (aX + c) % m \n
117 * m is a power of two \n
119 * a is 0.73m should be 0.01m .. 0.99m \n
120 * c is more or less immaterial. 1 or a is suggested. \n
122 * NB: LOW ORDER BITS are not very random. To get small random numbers,
123 * treat result as <1, with implied binary point, and multiply by
126 * NB: Has to be unsigned, since shifts on signed quantities may preserve
129 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
130 * is used because it has some interesting characteristics (lacks any
131 * interesting bit-patterns).
136 #ifdef AFS_PTHREAD_ENV
139 if (!called_afs_random_once)
140 pthread_once(&random_once, afs_random_once);
142 state = (uintptr_t) pthread_getspecific(random_number_key);
144 static afs_uint32 state = 0;
149 state = time(0) + getpid();
150 for (i = 0; i < 15; i++) {
156 #ifdef AFS_PTHREAD_ENV
157 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
164 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
165 * the low bits, as the low bits are "less random" than the high ones...
167 * \todo Slight roundoff error exists, an excercise for the reader.
169 * Need to multiply by something with lots of ones in it, so multiply by
170 * 8 or 16 is right out.
173 afs_randomMod15(void)
177 temp = afs_random() >> 4;
178 temp = (temp * 15) >> 28;
182 #endif /* !defined(UKERNEL) */
187 #define abs(a) ((a) < 0 ? -1*(a) : (a))
189 ubik_ClientInit(struct rx_connection **serverconns,
190 struct ubik_client **aclient)
195 struct ubik_client *tc;
197 initialize_U_error_table();
199 if (*aclient) { /* the application is doing a re-initialization */
200 LOCK_UBIK_CLIENT((*aclient));
201 /* this is an important defensive check */
202 if (!((*aclient)->initializationState)) {
203 UNLOCK_UBIK_CLIENT((*aclient));
204 return UREINITIALIZE;
207 /* release all existing connections */
208 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
209 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
212 #ifdef AFS_PTHREAD_ENV
213 rx_ReleaseCachedConnection(rxConn);
215 rx_DestroyConnection(rxConn);
218 UNLOCK_UBIK_CLIENT((*aclient));
219 #ifdef AFS_PTHREAD_ENV
220 if (pthread_mutex_destroy(&((*aclient)->cm)))
221 return UMUTEXDESTROY;
224 tc = malloc(sizeof(struct ubik_client));
228 memset((void *)tc, 0, sizeof(*tc));
229 #ifdef AFS_PTHREAD_ENV
230 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
234 tc->initializationState = ++ubik_initializationState;
236 /* first count the # of server conns so we can randomize properly */
238 for (i = 0; i < MAXSERVERS; i++) {
239 if (serverconns[i] == (struct rx_connection *)0)
244 /* here count is the # of servers we're actually passed in. Compute
245 * offset, a number between 0..count-1, where we'll start copying from the
246 * client-provided array. */
247 for (i = 0; i < count; i++) {
248 offset = afs_randomMod15() % count;
249 for (j = abs(offset); j < 2 * count; j++) {
250 if (!tc->conns[abs(j % count)]) {
251 tc->conns[abs(j % count)] = serverconns[i];
262 * \brief Destroy an ubik connection.
264 * It calls rx to destroy the component rx connections, then frees the ubik
265 * connection structure.
268 ubik_ClientDestroy(struct ubik_client * aclient)
274 LOCK_UBIK_CLIENT(aclient);
275 for (c = 0; c < MAXSERVERS; c++) {
276 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
279 #ifdef AFS_PTHREAD_ENV
280 rx_ReleaseCachedConnection(rxConn);
282 rx_DestroyConnection(rxConn);
285 aclient->initializationState = 0; /* client in not initialized */
286 UNLOCK_UBIK_CLIENT(aclient);
287 #ifdef AFS_PTHREAD_ENV
288 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
295 * \brief So that intermittent failures that cause connections to die
296 * don't kill whole ubik connection, refresh them when the connection is in
299 struct rx_connection *
300 ubik_RefreshConn(struct rx_connection *tc)
305 struct rx_securityClass *sc;
307 struct rx_connection *newTc;
309 host = rx_HostOf(rx_PeerOf(tc));
310 port = rx_PortOf(rx_PeerOf(tc));
311 service = rx_ServiceIdOf(tc);
312 sc = rx_SecurityObjectOf(tc);
313 si = rx_SecurityClassOf(tc);
316 * destroy old one after creating new one so that refCount on security
317 * object cannot reach zero.
319 newTc = rx_NewConnection(host, port, service, sc, si);
320 rx_DestroyConnection(tc);
324 #ifdef AFS_PTHREAD_ENV
326 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
327 pthread_mutex_t ubik_client_mutex;
328 #define LOCK_UCLNT_CACHE do { \
329 opr_Verify(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0); \
330 MUTEX_ENTER(&ubik_client_mutex); \
332 #define UNLOCK_UCLNT_CACHE MUTEX_EXIT(&ubik_client_mutex)
335 ubik_client_init_mutex(void)
337 MUTEX_INIT(&ubik_client_mutex, "client init", MUTEX_DEFAULT, 0);
342 #define LOCK_UCLNT_CACHE
343 #define UNLOCK_UCLNT_CACHE
348 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
349 static int synccount = 0;
354 * \brief Call this after getting back a #UNOTSYNC.
356 * \note Getting a #UNOTSYNC error code back does \b not guarantee
357 * that there is a sync site yet elected. However, if there is a sync
358 * site out there somewhere, and you're trying an operation that
359 * requires a sync site, ubik will return #UNOTSYNC, indicating the
360 * operation won't work until you find a sync site
363 try_GetSyncSite(struct ubik_client *aclient, afs_int32 apos)
368 afs_int32 thisHost, newHost;
369 struct rx_connection *tc;
372 origLevel = aclient->initializationState;
375 tc = aclient->conns[apos];
376 if (tc && rx_ConnError(tc)) {
377 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
383 /* now see if we can find the sync site host */
384 code = VOTE_GetSyncSite(tc, &newHost);
385 if (aclient->initializationState != origLevel) {
386 return -1; /* somebody did a ubik_ClientInit */
389 if (!code && newHost) {
390 newHost = htonl(newHost); /* convert back to network order */
393 * position count at the appropriate slot in the client
394 * structure and retry. If we can't find in slot, we'll just
395 * continue through the whole list
397 for (i = 0; i < MAXSERVERS; i++) {
398 rxp = rx_PeerOf(aclient->conns[i]);
399 thisHost = rx_HostOf(rxp);
402 } else if (thisHost == newHost) {
403 return i; /* we were told to use this one */
414 * \brief Create an internal version of ubik_CallIter that takes an additional
415 * parameter - to indicate whether the ubik client handle has already
419 CallIter(int (*aproc) (), struct ubik_client *aclient,
420 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
421 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
422 long p12, long p13, long p14, long p15, long p16, int needlock)
425 struct rx_connection *tc;
429 LOCK_UBIK_CLIENT(aclient);
431 origLevel = aclient->initializationState;
435 while (*apos < MAXSERVERS) {
436 /* tc is the next conn to try */
437 tc = aclient->conns[*apos];
441 if (rx_ConnError(tc)) {
442 tc = ubik_RefreshConn(tc);
443 aclient->conns[*apos] = tc;
446 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
447 (*apos)++; /* try another one if this server is down */
449 break; /* this is the desired path */
452 if (*apos >= MAXSERVERS)
456 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
458 if (aclient->initializationState != origLevel)
459 /* somebody did a ubik_ClientInit */
462 /* what should I do in case of UNOQUORUM ? */
464 aclient->states[*apos] |= CFLastFailed; /* network errors */
466 /* either misc ubik code, or misc application code or success. */
467 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
473 UNLOCK_UBIK_CLIENT(aclient);
479 * \brief This is part of an iterator. It doesn't handle finding sync sites.
482 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
483 afs_int32 aflags, int *apos, long p1, long p2,
484 long p3, long p4, long p5, long p6, long p7,
485 long p8, long p9, long p10, long p11, long p12,
486 long p13, long p14, long p15, long p16)
488 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
489 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
493 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
495 * \todo In the future, we should also put in a protocol to find the sync site.
498 ubik_Call_New(int (*aproc) (), struct ubik_client *aclient,
499 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
500 long p6, long p7, long p8, long p9, long p10, long p11,
501 long p12, long p13, long p14, long p15, long p16)
503 afs_int32 code, rcode;
510 LOCK_UBIK_CLIENT(aclient);
513 origLevel = aclient->initializationState;
515 /* Do two passes. First pass only checks servers known running */
516 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
517 pass++, aflags &= ~UPUBIKONLY) {
522 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
523 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
525 if (code && (aclient->initializationState != origLevel)) {
528 if (code == UNOSERVERS) {
531 rcode = code; /* remember code from last good call */
533 if (code == UNOTSYNC) { /* means this requires a sync site */
534 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
535 temp = try_GetSyncSite(aclient, count);
536 if (aclient->initializationState != origLevel) {
537 goto restart; /* somebody did a ubik_ClientInit */
539 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
540 count = temp; /* generally try to make progress */
543 } else if ((code >= 0) && (code != UNOQUORUM)) {
544 UNLOCK_UBIK_CLIENT(aclient);
545 return code; /* success or global error condition */
549 UNLOCK_UBIK_CLIENT(aclient);
554 * call this instead of stub and we'll guarantee to find a host that's up.
556 * \todo In the future, we should also put in a protocol to find the sync site.
559 ubik_Call(int (*aproc) (), struct ubik_client *aclient,
560 afs_int32 aflags, long p1, long p2, long p3, long p4,
561 long p5, long p6, long p7, long p8, long p9, long p10,
562 long p11, long p12, long p13, long p14, long p15, long p16)
564 afs_int32 rcode, code, newHost, thisHost, i, count;
565 int chaseCount, pass, needsync, inlist, j;
566 struct rx_connection *tc;
570 if (aflags & UBIK_CALL_NEW)
571 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
572 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
577 LOCK_UBIK_CLIENT(aclient);
580 origLevel = aclient->initializationState;
582 chaseCount = inlist = needsync = 0;
585 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
586 if (calls_needsync[j] == (int *)aproc) {
587 inlist = needsync = 1;
593 * First pass, we try all servers that are up.
594 * Second pass, we try all servers.
596 for (pass = 0; pass < 2; pass++) { /*p */
597 /* For each entry in our servers list */
598 for (count = 0;; count++) { /*s */
601 /* Need a sync site. Lets try to quickly find it */
602 if (aclient->syncSite) {
603 newHost = aclient->syncSite; /* already in network order */
604 aclient->syncSite = 0; /* Will reset if it works */
605 } else if (aclient->conns[3]) {
606 /* If there are fewer than four db servers in a cell,
607 * there's no point in making the GetSyncSite call.
608 * At best, it's a wash. At worst, it results in more
609 * RPCs than you would otherwise make.
611 tc = aclient->conns[count];
612 if (tc && rx_ConnError(tc)) {
613 aclient->conns[count] = tc = ubik_RefreshConn(tc);
617 code = VOTE_GetSyncSite(tc, &newHost);
618 if (aclient->initializationState != origLevel)
619 goto restart; /* somebody did a ubik_ClientInit */
622 newHost = htonl(newHost); /* convert to network order */
627 /* position count at the appropriate slot in the client
628 * structure and retry. If we can't find in slot, we'll
629 * just continue through the whole list
631 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
632 rxp = rx_PeerOf(aclient->conns[i]);
633 thisHost = rx_HostOf(rxp);
636 if (thisHost == newHost) {
637 if (chaseCount++ > 2)
638 break; /* avoid loop asking */
639 count = i; /* this index is the sync site */
646 tc = aclient->conns[count];
647 if (tc && rx_ConnError(tc)) {
648 aclient->conns[count] = tc = ubik_RefreshConn(tc);
653 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
654 continue; /* this guy's down */
658 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
659 p12, p13, p14, p15, p16);
660 if (aclient->initializationState != origLevel) {
661 /* somebody did a ubik_ClientInit */
663 goto restart; /* call failed */
665 goto done; /* call suceeded */
667 if (rcode < 0) { /* network errors */
668 aclient->states[count] |= CFLastFailed; /* Mark serer down */
669 } else if (rcode == UNOTSYNC) {
671 } else if (rcode != UNOQUORUM) {
672 /* either misc ubik code, or misc appl code, or success. */
673 aclient->states[count] &= ~CFLastFailed; /* mark server up */
674 goto done; /* all done */
681 if (!inlist) { /* Remember proc call that needs sync site */
683 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
688 if (!rcode) { /* Remember the sync site - cmd successful */
689 rxp = rx_PeerOf(aclient->conns[count]);
690 aclient->syncSite = rx_HostOf(rxp);
693 UNLOCK_UBIK_CLIENT(aclient);