2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
21 #include "afs/sysincludes.h"
22 #include "afsincludes.h"
27 #include "afs/rxgen_consts.h"
29 #include "afs/pthread_glock.h"
30 #else /* defined(UKERNEL) */
32 #include <afs/pthread_glock.h>
42 #include <netinet/in.h>
44 #include <afs/rxgen_consts.h>
46 #endif /* defined(UKERNEL) */
49 afs_int32 ubik_CallIter();
50 short ubik_initializationState; /* initial state is zero */
54 * parse list for clients
57 ubik_ParseClientList(int argc, char **argv, afs_int32 * aothers)
61 register struct hostent *th;
62 afs_int32 temp, counter;
65 inServer = 0; /* haven't seen -servers yet */
67 for (i = 1; i < argc; i++) {
68 /* look for -servers argument */
74 /* otherwise this is a new host name */
75 LOCK_GLOBAL_MUTEX th = gethostbyname(tp);
77 UNLOCK_GLOBAL_MUTEX return UBADHOST;
79 memmove((void *)&temp, (const void *)th->h_addr,
81 UNLOCK_GLOBAL_MUTEX if (counter++ >= MAXSERVERS)
85 /* haven't seen a -server yet */
86 if (!strcmp(tp, "-servers")) {
92 /* never saw a -server */
95 if (counter < MAXSERVERS)
96 *aothers++ = 0; /* null terminate if room */
100 #ifdef AFS_PTHREAD_ENV
104 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
105 static int called_afs_random_once;
106 static pthread_key_t random_number_key;
109 afs_random_once(void)
111 assert(pthread_key_create(&random_number_key, NULL) == 0);
112 called_afs_random_once = 1;
117 * Random number generator and constants from KnuthV2 2d ed, p170
121 * m is a power of two
123 * a is 0.73m should be 0.01m .. 0.99m
124 * c is more or less immaterial. 1 or a is suggested.
126 * NB: LOW ORDER BITS are not very random. To get small random numbers,
127 * treat result as <1, with implied binary point, and multiply by
129 * NB: Has to be unsigned, since shifts on signed quantities may preserve
132 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
133 * is used because it has some interesting characteristics (lacks any
134 * interesting bit-patterns).
139 * use time and pid to try to get some initial randomness.
141 #if !defined(UKERNEL)
142 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
147 #ifdef AFS_PTHREAD_ENV
150 (called_afs_random_once || pthread_once(&random_once, afs_random_once));
151 state = (afs_uint32) pthread_getspecific(random_number_key);
153 static afs_uint32 state = 0;
158 state = time(0) + getpid();
159 for (i = 0; i < 15; i++) {
165 #ifdef AFS_PTHREAD_ENV
166 pthread_setspecific(random_number_key, (const void *)state);
173 * returns int 0..14 using the high bits of a pseudo-random number instead of
174 * the low bits, as the low bits are "less random" than the high ones...
175 * slight roundoff error exists, an excercise for the reader.
176 * need to multiply by something with lots of ones in it, so multiply by
177 * 8 or 16 is right out.
181 afs_randomMod15(void)
185 temp = afs_random() >> 4;
186 temp = (temp * 15) >> 28;
190 #endif /* !defined(UKERNEL) */
195 #define abs(a) ((a) < 0 ? -1*(a) : (a))
197 ubik_ClientInit(register struct rx_connection **serverconns,
198 struct ubik_client **aclient)
203 register struct ubik_client *tc;
205 initialize_U_error_table();
207 if (*aclient) { /* the application is doing a re-initialization */
208 LOCK_UBIK_CLIENT((*aclient))
209 /* this is an important defensive check */
210 if (!((*aclient)->initializationState)) {
211 UNLOCK_UBIK_CLIENT((*aclient))
212 return UREINITIALIZE;
215 /* release all existing connections */
216 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
217 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
220 #ifdef AFS_PTHREAD_ENV
221 rx_ReleaseCachedConnection(rxConn);
223 rx_DestroyConnection(rxConn);
226 UNLOCK_UBIK_CLIENT((*aclient))
227 #ifdef AFS_PTHREAD_ENV
228 if (pthread_mutex_destroy(&((*aclient)->cm)))
229 return UMUTEXDESTROY;
232 tc = (struct ubik_client *)malloc(sizeof(struct ubik_client));
236 memset((void *)tc, 0, sizeof(*tc));
237 #ifdef AFS_PTHREAD_ENV
238 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
242 tc->initializationState = ++ubik_initializationState;
244 /* first count the # of server conns so we can randomize properly */
246 for (i = 0; i < MAXSERVERS; i++) {
247 if (serverconns[i] == (struct rx_connection *)0)
252 /* here count is the # of servers we're actually passed in. Compute
253 * offset, a number between 0..count-1, where we'll start copying from the
254 * client-provided array. */
255 for (i = 0; i < count; i++) {
256 offset = afs_randomMod15() % count;
257 for (j = abs(offset); j < 2 * count; j++) {
258 if (!tc->conns[abs(j % count)]) {
259 tc->conns[abs(j % count)] = serverconns[i];
270 * ubik_ClientDestroy - destroys a ubik connection. It calls rx to destroy the
271 * component rx connections, then frees the ubik connection structure.
275 ubik_ClientDestroy(struct ubik_client * aclient)
281 LOCK_UBIK_CLIENT(aclient);
282 for (c = 0; c < MAXSERVERS; c++) {
283 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
286 #ifdef AFS_PTHREAD_ENV
287 rx_ReleaseCachedConnection(rxConn);
289 rx_DestroyConnection(rxConn);
292 aclient->initializationState = 0; /* client in not initialized */
293 UNLOCK_UBIK_CLIENT(aclient);
294 #ifdef AFS_PTHREAD_ENV
295 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
302 * RefreshConn -- So that intermittent failures that cause connections to die
303 * don't kill whole ubik connection, refresh them when the connection is in
307 static struct rx_connection *
308 RefreshConn(struct rx_connection *tc)
313 struct rx_securityClass *sc;
315 struct rx_connection *newTc;
317 host = rx_HostOf(rx_PeerOf(tc));
318 port = rx_PortOf(rx_PeerOf(tc));
319 service = rx_ServiceIdOf(tc);
320 sc = rx_SecurityObjectOf(tc);
321 si = rx_SecurityClassOf(tc);
324 * destroy old one after creating new one so that refCount on security
325 * object cannot reach zero.
327 newTc = rx_NewConnection(host, port, service, sc, si);
328 rx_DestroyConnection(tc);
332 #ifdef AFS_PTHREAD_ENV
334 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
335 pthread_mutex_t ubik_client_mutex;
336 #define LOCK_UCLNT_CACHE \
337 assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0 && \
338 pthread_mutex_lock(&ubik_client_mutex)==0);
339 #define UNLOCK_UCLNT_CACHE assert(pthread_mutex_unlock(&ubik_client_mutex)==0);
342 ubik_client_init_mutex()
344 assert(pthread_mutex_init(&ubik_client_mutex, NULL) == 0);
349 #define LOCK_UCLNT_CACHE
350 #define UNLOCK_UCLNT_CACHE
355 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
356 static int synccount = 0;
359 * call this instead of stub and we'll guarantee to find a host that's up.
360 * in the future, we should also put in a protocol to find the sync site
363 ubik_Call(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10,
364 p11, p12, p13, p14, p15, p16)
366 register struct ubik_client *aclient;
385 afs_int32 rcode, code, newHost, thisHost, i, count;
386 int chaseCount, pass, needsync, inlist, j;
387 struct rx_connection *tc;
393 LOCK_UBIK_CLIENT(aclient);
396 origLevel = aclient->initializationState;
398 chaseCount = inlist = needsync = 0;
400 LOCK_UCLNT_CACHE for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
401 if (calls_needsync[j] == (int *)aproc) {
402 inlist = needsync = 1;
408 * First pass, we try all servers that are up.
409 * Second pass, we try all servers.
411 for (pass = 0; pass < 2; pass++) { /*p */
412 /* For each entry in our servers list */
413 for (count = 0;; count++) { /*s */
416 /* Need a sync site. Lets try to quickly find it */
417 if (aclient->syncSite) {
418 newHost = aclient->syncSite; /* already in network order */
419 aclient->syncSite = 0; /* Will reset if it works */
420 } else if (aclient->conns[3]) {
421 /* If there are fewer than four db servers in a cell,
422 * there's no point in making the GetSyncSite call.
423 * At best, it's a wash. At worst, it results in more
424 * RPCs than you would otherwise make.
426 tc = aclient->conns[count];
427 if (tc && rx_ConnError(tc)) {
428 aclient->conns[count] = tc = RefreshConn(tc);
432 code = VOTE_GetSyncSite(tc, &newHost);
433 if (aclient->initializationState != origLevel)
434 goto restart; /* somebody did a ubik_ClientInit */
437 newHost = htonl(newHost); /* convert to network order */
442 /* position count at the appropriate slot in the client
443 * structure and retry. If we can't find in slot, we'll
444 * just continue through the whole list
446 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
447 rxp = rx_PeerOf(aclient->conns[i]);
448 thisHost = rx_HostOf(rxp);
451 if (thisHost == newHost) {
452 if (chaseCount++ > 2)
453 break; /* avoid loop asking */
454 count = i; /* this index is the sync site */
461 tc = aclient->conns[count];
462 if (tc && rx_ConnError(tc)) {
463 aclient->conns[count] = tc = RefreshConn(tc);
468 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
469 continue; /* this guy's down */
473 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
474 p12, p13, p14, p15, p16);
475 if (aclient->initializationState != origLevel) {
476 /* somebody did a ubik_ClientInit */
478 goto restart; /* call failed */
480 goto done; /* call suceeded */
482 if (rcode < 0) { /* network errors */
483 aclient->states[count] |= CFLastFailed; /* Mark serer down */
484 } else if (rcode == UNOTSYNC) {
486 } else if (rcode != UNOQUORUM) {
487 /* either misc ubik code, or misc appl code, or success. */
488 aclient->states[count] &= ~CFLastFailed; /* mark server up */
489 goto done; /* all done */
496 if (!inlist) { /* Remember proc call that needs sync site */
497 LOCK_UCLNT_CACHE calls_needsync[synccount % SYNCCOUNT] =
500 UNLOCK_UCLNT_CACHE inlist = 1;
502 if (!rcode) { /* Remember the sync site - cmd successful */
503 rxp = rx_PeerOf(aclient->conns[count]);
504 aclient->syncSite = rx_HostOf(rxp);
507 UNLOCK_UBIK_CLIENT(aclient);
514 * call this after getting back a UNOTSYNC
515 * note that getting a UNOTSYNC error code back does *not* guarantee
516 * that there is a sync site yet elected. However, if there is a sync
517 * site out there somewhere, and you're trying an operation that
518 * requires a sync site, ubik will return UNOTSYNC, indicating the
519 * operation won't work until you find a sync site
522 try_GetSyncSite(register struct ubik_client *aclient, afs_int32 apos)
527 afs_int32 thisHost, newHost;
528 struct rx_connection *tc;
531 origLevel = aclient->initializationState;
534 tc = aclient->conns[apos];
535 if (tc && rx_ConnError(tc)) {
536 aclient->conns[apos] = (tc = RefreshConn(tc));
542 /* now see if we can find the sync site host */
543 code = VOTE_GetSyncSite(tc, &newHost);
544 if (aclient->initializationState != origLevel) {
545 return -1; /* somebody did a ubik_ClientInit */
548 if (!code && newHost) {
549 newHost = htonl(newHost); /* convert back to network order */
552 * position count at the appropriate slot in the client
553 * structure and retry. If we can't find in slot, we'll just
554 * continue through the whole list
556 for (i = 0; i < MAXSERVERS; i++) {
557 rxp = rx_PeerOf(aclient->conns[i]);
558 thisHost = rx_HostOf(rxp);
561 } else if (thisHost == newHost) {
562 return i; /* we were told to use this one */
570 * Create an internal version of ubik_CallIter that takes an additional
571 * parameter - to indicate whether the ubik client handle has already
579 CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7, p8, p9,
580 p10, p11, p12, p13, p14, p15, p16, needlock)
582 register struct ubik_client *aclient;
603 register afs_int32 code;
604 struct rx_connection *tc;
608 LOCK_UBIK_CLIENT(aclient)
610 origLevel = aclient->initializationState;
614 while (*apos < MAXSERVERS) {
615 /* tc is the next conn to try */
616 tc = aclient->conns[*apos];
619 UNLOCK_UBIK_CLIENT(aclient)
624 if (rx_ConnError(tc)) {
625 tc = RefreshConn(tc);
626 aclient->conns[*apos] = tc;
629 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
630 (*apos)++; /* try another one if this server is down */
632 break; /* this is the desired path */
635 if (*apos >= MAXSERVERS) {
637 UNLOCK_UBIK_CLIENT(aclient)
643 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
645 if (aclient->initializationState != origLevel) {
647 UNLOCK_UBIK_CLIENT(aclient)
649 return code; /* somebody did a ubik_ClientInit */
652 /* what should I do in case of UNOQUORUM ? */
654 aclient->states[*apos] |= CFLastFailed; /* network errors */
656 /* either misc ubik code, or misc application code or success. */
657 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
662 UNLOCK_UBIK_CLIENT(aclient)
668 * call this instead of stub and we'll guarantee to find a host that's up.
669 * in the future, we should also put in a protocol to find the sync site
672 ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10,
673 p11, p12, p13, p14, p15, p16)
675 register struct ubik_client *aclient;
694 afs_int32 code, rcode;
701 LOCK_UBIK_CLIENT(aclient)
704 origLevel = aclient->initializationState;
706 /* Do two passes. First pass only checks servers known running */
707 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
708 pass++, aflags &= ~UPUBIKONLY) {
713 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
714 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
716 if (code && (aclient->initializationState != origLevel)) {
719 if (code == UNOSERVERS) {
722 rcode = code; /* remember code from last good call */
724 if (code == UNOTSYNC) { /* means this requires a sync site */
725 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
726 temp = try_GetSyncSite(aclient, count);
727 if (aclient->initializationState != origLevel) {
728 goto restart; /* somebody did a ubik_ClientInit */
730 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
731 count = temp; /* generally try to make progress */
734 } else if ((code >= 0) && (code != UNOQUORUM)) {
735 UNLOCK_UBIK_CLIENT(aclient)
736 return code; /* success or global error condition */
740 UNLOCK_UBIK_CLIENT(aclient)
745 * This is part of an iterator. It doesn't handle finding sync sites
748 ubik_CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7, p8,
749 p9, p10, p11, p12, p13, p14, p15, p16)
751 register struct ubik_client *aclient;
771 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
772 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);