2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
13 #ifdef IGNORE_SOME_GCC_WARNINGS
14 # pragma GCC diagnostic warning "-Wstrict-prototypes"
18 #include "afsincludes.h"
22 #include <afs/pthread_glock.h>
33 #include <netinet/in.h>
35 #include <afs/rxgen_consts.h>
36 #define UBIK_LEGACY_CALLITER
43 short ubik_initializationState; /*!< initial state is zero */
47 * \brief Parse list for clients.
50 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
54 register struct hostent *th;
59 inServer = 0; /* haven't seen -servers yet */
61 for (i = 1; i < argc; i++) {
62 /* look for -servers argument */
68 /* otherwise this is a new host name */
70 th = gethostbyname(tp);
75 memmove((void *)&temp, (const void *)th->h_addr,
78 if (counter++ >= MAXSERVERS)
82 /* haven't seen a -server yet */
83 if (!strcmp(tp, "-servers")) {
89 /* never saw a -server */
92 if (counter < MAXSERVERS)
93 *aothers++ = 0; /* null terminate if room */
97 #ifdef AFS_PTHREAD_ENV
101 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
102 static int called_afs_random_once;
103 static pthread_key_t random_number_key;
106 afs_random_once(void)
108 assert(pthread_key_create(&random_number_key, NULL) == 0);
109 called_afs_random_once = 1;
114 #if !defined(UKERNEL)
116 * \brief use time and pid to try to get some initial randomness.
118 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
121 * \brief Random number generator and constants from KnuthV2 2d ed, p170
124 * X = (aX + c) % m \n
125 * m is a power of two \n
127 * a is 0.73m should be 0.01m .. 0.99m \n
128 * c is more or less immaterial. 1 or a is suggested. \n
130 * NB: LOW ORDER BITS are not very random. To get small random numbers,
131 * treat result as <1, with implied binary point, and multiply by
134 * NB: Has to be unsigned, since shifts on signed quantities may preserve
137 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
138 * is used because it has some interesting characteristics (lacks any
139 * interesting bit-patterns).
144 #ifdef AFS_PTHREAD_ENV
147 if (!called_afs_random_once)
148 pthread_once(&random_once, afs_random_once);
150 state = (uintptr_t) pthread_getspecific(random_number_key);
152 static afs_uint32 state = 0;
157 state = time(0) + getpid();
158 for (i = 0; i < 15; i++) {
164 #ifdef AFS_PTHREAD_ENV
165 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
172 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
173 * the low bits, as the low bits are "less random" than the high ones...
175 * \todo Slight roundoff error exists, an excercise for the reader.
177 * Need to multiply by something with lots of ones in it, so multiply by
178 * 8 or 16 is right out.
181 afs_randomMod15(void)
185 temp = afs_random() >> 4;
186 temp = (temp * 15) >> 28;
190 #endif /* !defined(UKERNEL) */
195 #define abs(a) ((a) < 0 ? -1*(a) : (a))
197 ubik_ClientInit(register struct rx_connection **serverconns,
198 struct ubik_client **aclient)
203 register struct ubik_client *tc;
205 initialize_U_error_table();
207 if (*aclient) { /* the application is doing a re-initialization */
208 LOCK_UBIK_CLIENT((*aclient));
209 /* this is an important defensive check */
210 if (!((*aclient)->initializationState)) {
211 UNLOCK_UBIK_CLIENT((*aclient));
212 return UREINITIALIZE;
215 /* release all existing connections */
216 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
217 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
220 #ifdef AFS_PTHREAD_ENV
221 rx_ReleaseCachedConnection(rxConn);
223 rx_DestroyConnection(rxConn);
226 UNLOCK_UBIK_CLIENT((*aclient));
227 #ifdef AFS_PTHREAD_ENV
228 if (pthread_mutex_destroy(&((*aclient)->cm)))
229 return UMUTEXDESTROY;
232 tc = (struct ubik_client *)malloc(sizeof(struct ubik_client));
236 memset((void *)tc, 0, sizeof(*tc));
237 #ifdef AFS_PTHREAD_ENV
238 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
242 tc->initializationState = ++ubik_initializationState;
244 /* first count the # of server conns so we can randomize properly */
246 for (i = 0; i < MAXSERVERS; i++) {
247 if (serverconns[i] == (struct rx_connection *)0)
252 /* here count is the # of servers we're actually passed in. Compute
253 * offset, a number between 0..count-1, where we'll start copying from the
254 * client-provided array. */
255 for (i = 0; i < count; i++) {
256 offset = afs_randomMod15() % count;
257 for (j = abs(offset); j < 2 * count; j++) {
258 if (!tc->conns[abs(j % count)]) {
259 tc->conns[abs(j % count)] = serverconns[i];
270 * \brief Destroy an ubik connection.
272 * It calls rx to destroy the component rx connections, then frees the ubik
273 * connection structure.
276 ubik_ClientDestroy(struct ubik_client * aclient)
282 LOCK_UBIK_CLIENT(aclient);
283 for (c = 0; c < MAXSERVERS; c++) {
284 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
287 #ifdef AFS_PTHREAD_ENV
288 rx_ReleaseCachedConnection(rxConn);
290 rx_DestroyConnection(rxConn);
293 aclient->initializationState = 0; /* client in not initialized */
294 UNLOCK_UBIK_CLIENT(aclient);
295 #ifdef AFS_PTHREAD_ENV
296 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
303 * \brief So that intermittent failures that cause connections to die
304 * don't kill whole ubik connection, refresh them when the connection is in
307 struct rx_connection *
308 ubik_RefreshConn(struct rx_connection *tc)
313 struct rx_securityClass *sc;
315 struct rx_connection *newTc;
317 host = rx_HostOf(rx_PeerOf(tc));
318 port = rx_PortOf(rx_PeerOf(tc));
319 service = rx_ServiceIdOf(tc);
320 sc = rx_SecurityObjectOf(tc);
321 si = rx_SecurityClassOf(tc);
324 * destroy old one after creating new one so that refCount on security
325 * object cannot reach zero.
327 newTc = rx_NewConnection(host, port, service, sc, si);
328 rx_DestroyConnection(tc);
332 #ifdef AFS_PTHREAD_ENV
334 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
335 pthread_mutex_t ubik_client_mutex;
336 #define LOCK_UCLNT_CACHE \
337 assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0 && \
338 pthread_mutex_lock(&ubik_client_mutex)==0)
339 #define UNLOCK_UCLNT_CACHE assert(pthread_mutex_unlock(&ubik_client_mutex)==0)
342 ubik_client_init_mutex(void)
344 assert(pthread_mutex_init(&ubik_client_mutex, NULL) == 0);
349 #define LOCK_UCLNT_CACHE
350 #define UNLOCK_UCLNT_CACHE
355 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
356 static int synccount = 0;
361 * \brief Call this after getting back a #UNOTSYNC.
363 * \note Getting a #UNOTSYNC error code back does \b not guarantee
364 * that there is a sync site yet elected. However, if there is a sync
365 * site out there somewhere, and you're trying an operation that
366 * requires a sync site, ubik will return #UNOTSYNC, indicating the
367 * operation won't work until you find a sync site
370 try_GetSyncSite(register struct ubik_client *aclient, afs_int32 apos)
375 afs_int32 thisHost, newHost;
376 struct rx_connection *tc;
379 origLevel = aclient->initializationState;
382 tc = aclient->conns[apos];
383 if (tc && rx_ConnError(tc)) {
384 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
390 /* now see if we can find the sync site host */
391 code = VOTE_GetSyncSite(tc, &newHost);
392 if (aclient->initializationState != origLevel) {
393 return -1; /* somebody did a ubik_ClientInit */
396 if (!code && newHost) {
397 newHost = htonl(newHost); /* convert back to network order */
400 * position count at the appropriate slot in the client
401 * structure and retry. If we can't find in slot, we'll just
402 * continue through the whole list
404 for (i = 0; i < MAXSERVERS; i++) {
405 rxp = rx_PeerOf(aclient->conns[i]);
406 thisHost = rx_HostOf(rxp);
409 } else if (thisHost == newHost) {
410 return i; /* we were told to use this one */
421 * \brief Create an internal version of ubik_CallIter that takes an additional
422 * parameter - to indicate whether the ubik client handle has already
426 CallIter(int (*aproc) (), register struct ubik_client *aclient,
427 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
428 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
429 long p12, long p13, long p14, long p15, long p16, int needlock)
431 register afs_int32 code;
432 struct rx_connection *tc;
436 LOCK_UBIK_CLIENT(aclient);
438 origLevel = aclient->initializationState;
442 while (*apos < MAXSERVERS) {
443 /* tc is the next conn to try */
444 tc = aclient->conns[*apos];
447 UNLOCK_UBIK_CLIENT(aclient);
452 if (rx_ConnError(tc)) {
453 tc = ubik_RefreshConn(tc);
454 aclient->conns[*apos] = tc;
457 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
458 (*apos)++; /* try another one if this server is down */
460 break; /* this is the desired path */
463 if (*apos >= MAXSERVERS) {
465 UNLOCK_UBIK_CLIENT(aclient);
471 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
473 if (aclient->initializationState != origLevel) {
475 UNLOCK_UBIK_CLIENT(aclient);
477 return code; /* somebody did a ubik_ClientInit */
480 /* what should I do in case of UNOQUORUM ? */
482 aclient->states[*apos] |= CFLastFailed; /* network errors */
484 /* either misc ubik code, or misc application code or success. */
485 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
490 UNLOCK_UBIK_CLIENT(aclient);
496 * \brief This is part of an iterator. It doesn't handle finding sync sites.
499 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
500 afs_int32 aflags, int *apos, long p1, long p2,
501 long p3, long p4, long p5, long p6, long p7,
502 long p8, long p9, long p10, long p11, long p12,
503 long p13, long p14, long p15, long p16)
505 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
506 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
510 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
512 * \todo In the future, we should also put in a protocol to find the sync site.
515 ubik_Call_New(int (*aproc) (), register struct ubik_client *aclient,
516 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
517 long p6, long p7, long p8, long p9, long p10, long p11,
518 long p12, long p13, long p14, long p15, long p16)
520 afs_int32 code, rcode;
527 LOCK_UBIK_CLIENT(aclient);
530 origLevel = aclient->initializationState;
532 /* Do two passes. First pass only checks servers known running */
533 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
534 pass++, aflags &= ~UPUBIKONLY) {
539 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
540 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
542 if (code && (aclient->initializationState != origLevel)) {
545 if (code == UNOSERVERS) {
548 rcode = code; /* remember code from last good call */
550 if (code == UNOTSYNC) { /* means this requires a sync site */
551 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
552 temp = try_GetSyncSite(aclient, count);
553 if (aclient->initializationState != origLevel) {
554 goto restart; /* somebody did a ubik_ClientInit */
556 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
557 count = temp; /* generally try to make progress */
560 } else if ((code >= 0) && (code != UNOQUORUM)) {
561 UNLOCK_UBIK_CLIENT(aclient);
562 return code; /* success or global error condition */
566 UNLOCK_UBIK_CLIENT(aclient);
571 * call this instead of stub and we'll guarantee to find a host that's up.
573 * \todo In the future, we should also put in a protocol to find the sync site.
576 ubik_Call(int (*aproc) (), register struct ubik_client *aclient,
577 afs_int32 aflags, long p1, long p2, long p3, long p4,
578 long p5, long p6, long p7, long p8, long p9, long p10,
579 long p11, long p12, long p13, long p14, long p15, long p16)
581 afs_int32 rcode, code, newHost, thisHost, i, count;
582 int chaseCount, pass, needsync, inlist, j;
583 struct rx_connection *tc;
587 if (aflags & UBIK_CALL_NEW)
588 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
589 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
594 LOCK_UBIK_CLIENT(aclient);
597 origLevel = aclient->initializationState;
599 chaseCount = inlist = needsync = 0;
602 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
603 if (calls_needsync[j] == (int *)aproc) {
604 inlist = needsync = 1;
610 * First pass, we try all servers that are up.
611 * Second pass, we try all servers.
613 for (pass = 0; pass < 2; pass++) { /*p */
614 /* For each entry in our servers list */
615 for (count = 0;; count++) { /*s */
618 /* Need a sync site. Lets try to quickly find it */
619 if (aclient->syncSite) {
620 newHost = aclient->syncSite; /* already in network order */
621 aclient->syncSite = 0; /* Will reset if it works */
622 } else if (aclient->conns[3]) {
623 /* If there are fewer than four db servers in a cell,
624 * there's no point in making the GetSyncSite call.
625 * At best, it's a wash. At worst, it results in more
626 * RPCs than you would otherwise make.
628 tc = aclient->conns[count];
629 if (tc && rx_ConnError(tc)) {
630 aclient->conns[count] = tc = ubik_RefreshConn(tc);
634 code = VOTE_GetSyncSite(tc, &newHost);
635 if (aclient->initializationState != origLevel)
636 goto restart; /* somebody did a ubik_ClientInit */
639 newHost = htonl(newHost); /* convert to network order */
644 /* position count at the appropriate slot in the client
645 * structure and retry. If we can't find in slot, we'll
646 * just continue through the whole list
648 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
649 rxp = rx_PeerOf(aclient->conns[i]);
650 thisHost = rx_HostOf(rxp);
653 if (thisHost == newHost) {
654 if (chaseCount++ > 2)
655 break; /* avoid loop asking */
656 count = i; /* this index is the sync site */
663 tc = aclient->conns[count];
664 if (tc && rx_ConnError(tc)) {
665 aclient->conns[count] = tc = ubik_RefreshConn(tc);
670 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
671 continue; /* this guy's down */
675 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
676 p12, p13, p14, p15, p16);
677 if (aclient->initializationState != origLevel) {
678 /* somebody did a ubik_ClientInit */
680 goto restart; /* call failed */
682 goto done; /* call suceeded */
684 if (rcode < 0) { /* network errors */
685 aclient->states[count] |= CFLastFailed; /* Mark serer down */
686 } else if (rcode == UNOTSYNC) {
688 } else if (rcode != UNOQUORUM) {
689 /* either misc ubik code, or misc appl code, or success. */
690 aclient->states[count] &= ~CFLastFailed; /* mark server up */
691 goto done; /* all done */
698 if (!inlist) { /* Remember proc call that needs sync site */
700 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
705 if (!rcode) { /* Remember the sync site - cmd successful */
706 rxp = rx_PeerOf(aclient->conns[count]);
707 aclient->syncSite = rx_HostOf(rxp);
710 UNLOCK_UBIK_CLIENT(aclient);