2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
17 #ifdef IGNORE_SOME_GCC_WARNINGS
18 # pragma GCC diagnostic warning "-Wstrict-prototypes"
22 #include "afsincludes.h"
25 #include <afs/pthread_glock.h>
29 #include <afs/rxgen_consts.h>
30 #define UBIK_LEGACY_CALLITER
33 short ubik_initializationState; /*!< initial state is zero */
37 * \brief Parse list for clients.
40 ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers)
49 inServer = 0; /* haven't seen -servers yet */
51 for (i = 1; i < argc; i++) {
52 /* look for -servers argument */
58 /* otherwise this is a new host name */
60 th = gethostbyname(tp);
65 memmove((void *)&temp, (const void *)th->h_addr,
68 if (counter++ >= MAXSERVERS)
72 /* haven't seen a -server yet */
73 if (!strcmp(tp, "-servers")) {
79 /* never saw a -server */
82 if (counter < MAXSERVERS)
83 *aothers++ = 0; /* null terminate if room */
87 #ifdef AFS_PTHREAD_ENV
90 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
91 static int called_afs_random_once;
92 static pthread_key_t random_number_key;
97 opr_Verify(pthread_key_create(&random_number_key, NULL) == 0);
98 called_afs_random_once = 1;
103 #if !defined(UKERNEL)
105 * \brief use time and pid to try to get some initial randomness.
107 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
110 * \brief Random number generator and constants from KnuthV2 2d ed, p170
113 * X = (aX + c) % m \n
114 * m is a power of two \n
116 * a is 0.73m should be 0.01m .. 0.99m \n
117 * c is more or less immaterial. 1 or a is suggested. \n
119 * NB: LOW ORDER BITS are not very random. To get small random numbers,
120 * treat result as <1, with implied binary point, and multiply by
123 * NB: Has to be unsigned, since shifts on signed quantities may preserve
126 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
127 * is used because it has some interesting characteristics (lacks any
128 * interesting bit-patterns).
133 #ifdef AFS_PTHREAD_ENV
136 if (!called_afs_random_once)
137 pthread_once(&random_once, afs_random_once);
139 state = (uintptr_t) pthread_getspecific(random_number_key);
141 static afs_uint32 state = 0;
146 state = time(0) + getpid();
147 for (i = 0; i < 15; i++) {
153 #ifdef AFS_PTHREAD_ENV
154 pthread_setspecific(random_number_key, (const void *)(uintptr_t)state);
161 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
162 * the low bits, as the low bits are "less random" than the high ones...
164 * \todo Slight roundoff error exists, an excercise for the reader.
166 * Need to multiply by something with lots of ones in it, so multiply by
167 * 8 or 16 is right out.
170 afs_randomMod15(void)
174 temp = afs_random() >> 4;
175 temp = (temp * 15) >> 28;
179 #endif /* !defined(UKERNEL) */
184 #define abs(a) ((a) < 0 ? -1*(a) : (a))
186 ubik_ClientInit(struct rx_connection **serverconns,
187 struct ubik_client **aclient)
192 struct ubik_client *tc;
194 initialize_U_error_table();
196 if (*aclient) { /* the application is doing a re-initialization */
197 LOCK_UBIK_CLIENT((*aclient));
198 /* this is an important defensive check */
199 if (!((*aclient)->initializationState)) {
200 UNLOCK_UBIK_CLIENT((*aclient));
201 return UREINITIALIZE;
204 /* release all existing connections */
205 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
206 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
209 #ifdef AFS_PTHREAD_ENV
210 rx_ReleaseCachedConnection(rxConn);
212 rx_DestroyConnection(rxConn);
215 UNLOCK_UBIK_CLIENT((*aclient));
216 #ifdef AFS_PTHREAD_ENV
217 if (pthread_mutex_destroy(&((*aclient)->cm)))
218 return UMUTEXDESTROY;
221 tc = malloc(sizeof(struct ubik_client));
225 memset((void *)tc, 0, sizeof(*tc));
226 #ifdef AFS_PTHREAD_ENV
227 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
231 tc->initializationState = ++ubik_initializationState;
233 /* first count the # of server conns so we can randomize properly */
235 for (i = 0; i < MAXSERVERS; i++) {
236 if (serverconns[i] == (struct rx_connection *)0)
241 /* here count is the # of servers we're actually passed in. Compute
242 * offset, a number between 0..count-1, where we'll start copying from the
243 * client-provided array. */
244 for (i = 0; i < count; i++) {
245 offset = afs_randomMod15() % count;
246 for (j = abs(offset); j < 2 * count; j++) {
247 if (!tc->conns[abs(j % count)]) {
248 tc->conns[abs(j % count)] = serverconns[i];
259 * \brief Destroy an ubik connection.
261 * It calls rx to destroy the component rx connections, then frees the ubik
262 * connection structure.
265 ubik_ClientDestroy(struct ubik_client * aclient)
271 LOCK_UBIK_CLIENT(aclient);
272 for (c = 0; c < MAXSERVERS; c++) {
273 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
276 #ifdef AFS_PTHREAD_ENV
277 rx_ReleaseCachedConnection(rxConn);
279 rx_DestroyConnection(rxConn);
282 aclient->initializationState = 0; /* client in not initialized */
283 UNLOCK_UBIK_CLIENT(aclient);
284 #ifdef AFS_PTHREAD_ENV
285 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
292 * \brief So that intermittent failures that cause connections to die
293 * don't kill whole ubik connection, refresh them when the connection is in
296 struct rx_connection *
297 ubik_RefreshConn(struct rx_connection *tc)
302 struct rx_securityClass *sc;
304 struct rx_connection *newTc;
306 host = rx_HostOf(rx_PeerOf(tc));
307 port = rx_PortOf(rx_PeerOf(tc));
308 service = rx_ServiceIdOf(tc);
309 sc = rx_SecurityObjectOf(tc);
310 si = rx_SecurityClassOf(tc);
313 * destroy old one after creating new one so that refCount on security
314 * object cannot reach zero.
316 newTc = rx_NewConnection(host, port, service, sc, si);
317 rx_DestroyConnection(tc);
321 #ifdef AFS_PTHREAD_ENV
323 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
324 pthread_mutex_t ubik_client_mutex;
325 #define LOCK_UCLNT_CACHE do { \
326 opr_Verify(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0); \
327 MUTEX_ENTER(&ubik_client_mutex); \
329 #define UNLOCK_UCLNT_CACHE MUTEX_EXIT(&ubik_client_mutex)
332 ubik_client_init_mutex(void)
334 MUTEX_INIT(&ubik_client_mutex, "client init", MUTEX_DEFAULT, 0);
339 #define LOCK_UCLNT_CACHE
340 #define UNLOCK_UCLNT_CACHE
345 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
346 static int synccount = 0;
351 * \brief Call this after getting back a #UNOTSYNC.
353 * \note Getting a #UNOTSYNC error code back does \b not guarantee
354 * that there is a sync site yet elected. However, if there is a sync
355 * site out there somewhere, and you're trying an operation that
356 * requires a sync site, ubik will return #UNOTSYNC, indicating the
357 * operation won't work until you find a sync site
360 try_GetSyncSite(struct ubik_client *aclient, afs_int32 apos)
365 afs_int32 thisHost, newHost;
366 struct rx_connection *tc;
369 origLevel = aclient->initializationState;
372 tc = aclient->conns[apos];
373 if (tc && rx_ConnError(tc)) {
374 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
380 /* now see if we can find the sync site host */
381 code = VOTE_GetSyncSite(tc, &newHost);
382 if (aclient->initializationState != origLevel) {
383 return -1; /* somebody did a ubik_ClientInit */
386 if (!code && newHost) {
387 newHost = htonl(newHost); /* convert back to network order */
390 * position count at the appropriate slot in the client
391 * structure and retry. If we can't find in slot, we'll just
392 * continue through the whole list
394 for (i = 0; i < MAXSERVERS; i++) {
395 rxp = rx_PeerOf(aclient->conns[i]);
396 thisHost = rx_HostOf(rxp);
399 } else if (thisHost == newHost) {
400 return i; /* we were told to use this one */
411 * \brief Create an internal version of ubik_CallIter that takes an additional
412 * parameter - to indicate whether the ubik client handle has already
416 CallIter(int (*aproc) (), struct ubik_client *aclient,
417 afs_int32 aflags, int *apos, long p1, long p2, long p3, long p4,
418 long p5, long p6, long p7, long p8, long p9, long p10, long p11,
419 long p12, long p13, long p14, long p15, long p16, int needlock)
422 struct rx_connection *tc;
426 LOCK_UBIK_CLIENT(aclient);
428 origLevel = aclient->initializationState;
432 while (*apos < MAXSERVERS) {
433 /* tc is the next conn to try */
434 tc = aclient->conns[*apos];
438 if (rx_ConnError(tc)) {
439 tc = ubik_RefreshConn(tc);
440 aclient->conns[*apos] = tc;
443 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
444 (*apos)++; /* try another one if this server is down */
446 break; /* this is the desired path */
449 if (*apos >= MAXSERVERS)
453 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
455 if (aclient->initializationState != origLevel)
456 /* somebody did a ubik_ClientInit */
459 /* what should I do in case of UNOQUORUM ? */
461 aclient->states[*apos] |= CFLastFailed; /* network errors */
463 /* either misc ubik code, or misc application code or success. */
464 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
470 UNLOCK_UBIK_CLIENT(aclient);
476 * \brief This is part of an iterator. It doesn't handle finding sync sites.
479 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
480 afs_int32 aflags, int *apos, long p1, long p2,
481 long p3, long p4, long p5, long p6, long p7,
482 long p8, long p9, long p10, long p11, long p12,
483 long p13, long p14, long p15, long p16)
485 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
486 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);
490 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
492 * \todo In the future, we should also put in a protocol to find the sync site.
495 ubik_Call_New(int (*aproc) (), struct ubik_client *aclient,
496 afs_int32 aflags, long p1, long p2, long p3, long p4, long p5,
497 long p6, long p7, long p8, long p9, long p10, long p11,
498 long p12, long p13, long p14, long p15, long p16)
500 afs_int32 code, rcode;
507 LOCK_UBIK_CLIENT(aclient);
510 origLevel = aclient->initializationState;
512 /* Do two passes. First pass only checks servers known running */
513 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
514 pass++, aflags &= ~UPUBIKONLY) {
519 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
520 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
522 if (code && (aclient->initializationState != origLevel)) {
525 if (code == UNOSERVERS) {
528 rcode = code; /* remember code from last good call */
530 if (code == UNOTSYNC) { /* means this requires a sync site */
531 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
532 temp = try_GetSyncSite(aclient, count);
533 if (aclient->initializationState != origLevel) {
534 goto restart; /* somebody did a ubik_ClientInit */
536 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
537 count = temp; /* generally try to make progress */
540 } else if ((code >= 0) && (code != UNOQUORUM)) {
541 UNLOCK_UBIK_CLIENT(aclient);
542 return code; /* success or global error condition */
546 UNLOCK_UBIK_CLIENT(aclient);
551 * call this instead of stub and we'll guarantee to find a host that's up.
553 * \todo In the future, we should also put in a protocol to find the sync site.
556 ubik_Call(int (*aproc) (), struct ubik_client *aclient,
557 afs_int32 aflags, long p1, long p2, long p3, long p4,
558 long p5, long p6, long p7, long p8, long p9, long p10,
559 long p11, long p12, long p13, long p14, long p15, long p16)
561 afs_int32 rcode, code, newHost, thisHost, i, count;
562 int chaseCount, pass, needsync, inlist, j;
563 struct rx_connection *tc;
567 if (aflags & UBIK_CALL_NEW)
568 return ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4,
569 p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,
574 LOCK_UBIK_CLIENT(aclient);
577 origLevel = aclient->initializationState;
579 chaseCount = inlist = needsync = 0;
582 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
583 if (calls_needsync[j] == (int *)aproc) {
584 inlist = needsync = 1;
590 * First pass, we try all servers that are up.
591 * Second pass, we try all servers.
593 for (pass = 0; pass < 2; pass++) { /*p */
594 /* For each entry in our servers list */
595 for (count = 0;; count++) { /*s */
598 /* Need a sync site. Lets try to quickly find it */
599 if (aclient->syncSite) {
600 newHost = aclient->syncSite; /* already in network order */
601 aclient->syncSite = 0; /* Will reset if it works */
602 } else if (aclient->conns[3]) {
603 /* If there are fewer than four db servers in a cell,
604 * there's no point in making the GetSyncSite call.
605 * At best, it's a wash. At worst, it results in more
606 * RPCs than you would otherwise make.
608 tc = aclient->conns[count];
609 if (tc && rx_ConnError(tc)) {
610 aclient->conns[count] = tc = ubik_RefreshConn(tc);
614 code = VOTE_GetSyncSite(tc, &newHost);
615 if (aclient->initializationState != origLevel)
616 goto restart; /* somebody did a ubik_ClientInit */
619 newHost = htonl(newHost); /* convert to network order */
624 /* position count at the appropriate slot in the client
625 * structure and retry. If we can't find in slot, we'll
626 * just continue through the whole list
628 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
629 rxp = rx_PeerOf(aclient->conns[i]);
630 thisHost = rx_HostOf(rxp);
633 if (thisHost == newHost) {
634 if (chaseCount++ > 2)
635 break; /* avoid loop asking */
636 count = i; /* this index is the sync site */
643 tc = aclient->conns[count];
644 if (tc && rx_ConnError(tc)) {
645 aclient->conns[count] = tc = ubik_RefreshConn(tc);
650 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
651 continue; /* this guy's down */
655 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
656 p12, p13, p14, p15, p16);
657 if (aclient->initializationState != origLevel) {
658 /* somebody did a ubik_ClientInit */
660 goto restart; /* call failed */
662 goto done; /* call suceeded */
664 if (rcode < 0) { /* network errors */
665 aclient->states[count] |= CFLastFailed; /* Mark serer down */
666 } else if (rcode == UNOTSYNC) {
668 } else if (rcode != UNOQUORUM) {
669 /* either misc ubik code, or misc appl code, or success. */
670 aclient->states[count] &= ~CFLastFailed; /* mark server up */
671 goto done; /* all done */
678 if (!inlist) { /* Remember proc call that needs sync site */
680 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
685 if (!rcode) { /* Remember the sync site - cmd successful */
686 rxp = rx_PeerOf(aclient->conns[count]);
687 aclient->syncSite = rx_HostOf(rxp);
690 UNLOCK_UBIK_CLIENT(aclient);