2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
12 #include "afs/param.h"
14 #include <afs/param.h>
21 #include "afs/sysincludes.h"
22 #include "afsincludes.h"
27 #include "afs/rxgen_consts.h"
29 #include "afs/pthread_glock.h"
30 #else /* defined(UKERNEL) */
32 #include <afs/pthread_glock.h>
42 #include <netinet/in.h>
44 #include <afs/rxgen_consts.h>
46 #endif /* defined(UKERNEL) */
49 short ubik_initializationState; /*!< initial state is zero */
53 * \brief Parse list for clients.
56 ubik_ParseClientList(int argc, char **argv, afs_int32 * aothers)
60 register struct hostent *th;
61 afs_int32 temp, counter;
64 inServer = 0; /* haven't seen -servers yet */
66 for (i = 1; i < argc; i++) {
67 /* look for -servers argument */
73 /* otherwise this is a new host name */
75 th = gethostbyname(tp);
80 memmove((void *)&temp, (const void *)th->h_addr,
83 if (counter++ >= MAXSERVERS)
87 /* haven't seen a -server yet */
88 if (!strcmp(tp, "-servers")) {
94 /* never saw a -server */
97 if (counter < MAXSERVERS)
98 *aothers++ = 0; /* null terminate if room */
102 #ifdef AFS_PTHREAD_ENV
106 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
107 static int called_afs_random_once;
108 static pthread_key_t random_number_key;
111 afs_random_once(void)
113 assert(pthread_key_create(&random_number_key, NULL) == 0);
114 called_afs_random_once = 1;
119 #if !defined(UKERNEL)
121 * \brief use time and pid to try to get some initial randomness.
123 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
126 * \brief Random number generator and constants from KnuthV2 2d ed, p170
129 * X = (aX + c) % m \n
130 * m is a power of two \n
132 * a is 0.73m should be 0.01m .. 0.99m \n
133 * c is more or less immaterial. 1 or a is suggested. \n
135 * NB: LOW ORDER BITS are not very random. To get small random numbers,
136 * treat result as <1, with implied binary point, and multiply by
139 * NB: Has to be unsigned, since shifts on signed quantities may preserve
142 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
143 * is used because it has some interesting characteristics (lacks any
144 * interesting bit-patterns).
149 #ifdef AFS_PTHREAD_ENV
152 (called_afs_random_once || pthread_once(&random_once, afs_random_once));
153 state = (afs_uint32) pthread_getspecific(random_number_key);
155 static afs_uint32 state = 0;
160 state = time(0) + getpid();
161 for (i = 0; i < 15; i++) {
167 #ifdef AFS_PTHREAD_ENV
168 pthread_setspecific(random_number_key, (const void *)state);
175 * \brief Returns int 0..14 using the high bits of a pseudo-random number instead of
176 * the low bits, as the low bits are "less random" than the high ones...
178 * \todo Slight roundoff error exists, an excercise for the reader.
180 * Need to multiply by something with lots of ones in it, so multiply by
181 * 8 or 16 is right out.
184 afs_randomMod15(void)
188 temp = afs_random() >> 4;
189 temp = (temp * 15) >> 28;
193 #endif /* !defined(UKERNEL) */
198 #define abs(a) ((a) < 0 ? -1*(a) : (a))
200 ubik_ClientInit(register struct rx_connection **serverconns,
201 struct ubik_client **aclient)
206 register struct ubik_client *tc;
208 initialize_U_error_table();
210 if (*aclient) { /* the application is doing a re-initialization */
211 LOCK_UBIK_CLIENT((*aclient));
212 /* this is an important defensive check */
213 if (!((*aclient)->initializationState)) {
214 UNLOCK_UBIK_CLIENT((*aclient));
215 return UREINITIALIZE;
218 /* release all existing connections */
219 for (tc = *aclient, i = 0; i < MAXSERVERS; i++) {
220 struct rx_connection *rxConn = ubik_GetRPCConn(tc, i);
223 #ifdef AFS_PTHREAD_ENV
224 rx_ReleaseCachedConnection(rxConn);
226 rx_DestroyConnection(rxConn);
229 UNLOCK_UBIK_CLIENT((*aclient));
230 #ifdef AFS_PTHREAD_ENV
231 if (pthread_mutex_destroy(&((*aclient)->cm)))
232 return UMUTEXDESTROY;
235 tc = (struct ubik_client *)malloc(sizeof(struct ubik_client));
239 memset((void *)tc, 0, sizeof(*tc));
240 #ifdef AFS_PTHREAD_ENV
241 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t *)0)) {
245 tc->initializationState = ++ubik_initializationState;
247 /* first count the # of server conns so we can randomize properly */
249 for (i = 0; i < MAXSERVERS; i++) {
250 if (serverconns[i] == (struct rx_connection *)0)
255 /* here count is the # of servers we're actually passed in. Compute
256 * offset, a number between 0..count-1, where we'll start copying from the
257 * client-provided array. */
258 for (i = 0; i < count; i++) {
259 offset = afs_randomMod15() % count;
260 for (j = abs(offset); j < 2 * count; j++) {
261 if (!tc->conns[abs(j % count)]) {
262 tc->conns[abs(j % count)] = serverconns[i];
273 * \brief Destroy an ubik connection.
275 * It calls rx to destroy the component rx connections, then frees the ubik
276 * connection structure.
279 ubik_ClientDestroy(struct ubik_client * aclient)
285 LOCK_UBIK_CLIENT(aclient);
286 for (c = 0; c < MAXSERVERS; c++) {
287 struct rx_connection *rxConn = ubik_GetRPCConn(aclient, c);
290 #ifdef AFS_PTHREAD_ENV
291 rx_ReleaseCachedConnection(rxConn);
293 rx_DestroyConnection(rxConn);
296 aclient->initializationState = 0; /* client in not initialized */
297 UNLOCK_UBIK_CLIENT(aclient);
298 #ifdef AFS_PTHREAD_ENV
299 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
306 * \brief So that intermittent failures that cause connections to die
307 * don't kill whole ubik connection, refresh them when the connection is in
310 struct rx_connection *
311 ubik_RefreshConn(struct rx_connection *tc)
316 struct rx_securityClass *sc;
318 struct rx_connection *newTc;
320 host = rx_HostOf(rx_PeerOf(tc));
321 port = rx_PortOf(rx_PeerOf(tc));
322 service = rx_ServiceIdOf(tc);
323 sc = rx_SecurityObjectOf(tc);
324 si = rx_SecurityClassOf(tc);
327 * destroy old one after creating new one so that refCount on security
328 * object cannot reach zero.
330 newTc = rx_NewConnection(host, port, service, sc, si);
331 rx_DestroyConnection(tc);
335 #ifdef AFS_PTHREAD_ENV
337 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
338 pthread_mutex_t ubik_client_mutex;
339 #define LOCK_UCLNT_CACHE \
340 assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0 && \
341 pthread_mutex_lock(&ubik_client_mutex)==0)
342 #define UNLOCK_UCLNT_CACHE assert(pthread_mutex_unlock(&ubik_client_mutex)==0)
345 ubik_client_init_mutex()
347 assert(pthread_mutex_init(&ubik_client_mutex, NULL) == 0);
352 #define LOCK_UCLNT_CACHE
353 #define UNLOCK_UCLNT_CACHE
358 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
359 static int synccount = 0;
362 * call this instead of stub and we'll guarantee to find a host that's up.
364 * \todo In the future, we should also put in a protocol to find the sync site.
367 ubik_Call(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10,
368 p11, p12, p13, p14, p15, p16)
370 register struct ubik_client *aclient;
389 afs_int32 rcode, code, newHost, thisHost, i, count;
390 int chaseCount, pass, needsync, inlist, j;
391 struct rx_connection *tc;
397 LOCK_UBIK_CLIENT(aclient);
400 origLevel = aclient->initializationState;
402 chaseCount = inlist = needsync = 0;
405 for (j = 0; ((j < SYNCCOUNT) && calls_needsync[j]); j++) {
406 if (calls_needsync[j] == (int *)aproc) {
407 inlist = needsync = 1;
413 * First pass, we try all servers that are up.
414 * Second pass, we try all servers.
416 for (pass = 0; pass < 2; pass++) { /*p */
417 /* For each entry in our servers list */
418 for (count = 0;; count++) { /*s */
421 /* Need a sync site. Lets try to quickly find it */
422 if (aclient->syncSite) {
423 newHost = aclient->syncSite; /* already in network order */
424 aclient->syncSite = 0; /* Will reset if it works */
425 } else if (aclient->conns[3]) {
426 /* If there are fewer than four db servers in a cell,
427 * there's no point in making the GetSyncSite call.
428 * At best, it's a wash. At worst, it results in more
429 * RPCs than you would otherwise make.
431 tc = aclient->conns[count];
432 if (tc && rx_ConnError(tc)) {
433 aclient->conns[count] = tc = ubik_RefreshConn(tc);
437 code = VOTE_GetSyncSite(tc, &newHost);
438 if (aclient->initializationState != origLevel)
439 goto restart; /* somebody did a ubik_ClientInit */
442 newHost = htonl(newHost); /* convert to network order */
447 /* position count at the appropriate slot in the client
448 * structure and retry. If we can't find in slot, we'll
449 * just continue through the whole list
451 for (i = 0; i < MAXSERVERS && aclient->conns[i]; i++) {
452 rxp = rx_PeerOf(aclient->conns[i]);
453 thisHost = rx_HostOf(rxp);
456 if (thisHost == newHost) {
457 if (chaseCount++ > 2)
458 break; /* avoid loop asking */
459 count = i; /* this index is the sync site */
466 tc = aclient->conns[count];
467 if (tc && rx_ConnError(tc)) {
468 aclient->conns[count] = tc = ubik_RefreshConn(tc);
473 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
474 continue; /* this guy's down */
478 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11,
479 p12, p13, p14, p15, p16);
480 if (aclient->initializationState != origLevel) {
481 /* somebody did a ubik_ClientInit */
483 goto restart; /* call failed */
485 goto done; /* call suceeded */
487 if (rcode < 0) { /* network errors */
488 aclient->states[count] |= CFLastFailed; /* Mark serer down */
489 } else if (rcode == UNOTSYNC) {
491 } else if (rcode != UNOQUORUM) {
492 /* either misc ubik code, or misc appl code, or success. */
493 aclient->states[count] &= ~CFLastFailed; /* mark server up */
494 goto done; /* all done */
501 if (!inlist) { /* Remember proc call that needs sync site */
503 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
508 if (!rcode) { /* Remember the sync site - cmd successful */
509 rxp = rx_PeerOf(aclient->conns[count]);
510 aclient->syncSite = rx_HostOf(rxp);
513 UNLOCK_UBIK_CLIENT(aclient);
520 * \brief Call this after getting back a #UNOTSYNC.
522 * \note Getting a #UNOTSYNC error code back does \b not guarantee
523 * that there is a sync site yet elected. However, if there is a sync
524 * site out there somewhere, and you're trying an operation that
525 * requires a sync site, ubik will return #UNOTSYNC, indicating the
526 * operation won't work until you find a sync site
529 try_GetSyncSite(register struct ubik_client *aclient, afs_int32 apos)
534 afs_int32 thisHost, newHost;
535 struct rx_connection *tc;
538 origLevel = aclient->initializationState;
541 tc = aclient->conns[apos];
542 if (tc && rx_ConnError(tc)) {
543 aclient->conns[apos] = (tc = ubik_RefreshConn(tc));
549 /* now see if we can find the sync site host */
550 code = VOTE_GetSyncSite(tc, &newHost);
551 if (aclient->initializationState != origLevel) {
552 return -1; /* somebody did a ubik_ClientInit */
555 if (!code && newHost) {
556 newHost = htonl(newHost); /* convert back to network order */
559 * position count at the appropriate slot in the client
560 * structure and retry. If we can't find in slot, we'll just
561 * continue through the whole list
563 for (i = 0; i < MAXSERVERS; i++) {
564 rxp = rx_PeerOf(aclient->conns[i]);
565 thisHost = rx_HostOf(rxp);
568 } else if (thisHost == newHost) {
569 return i; /* we were told to use this one */
580 * \brief Create an internal version of ubik_CallIter that takes an additional
581 * parameter - to indicate whether the ubik client handle has already
585 CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7, p8, p9,
586 p10, p11, p12, p13, p14, p15, p16, needlock)
588 register struct ubik_client *aclient;
609 register afs_int32 code;
610 struct rx_connection *tc;
614 LOCK_UBIK_CLIENT(aclient);
616 origLevel = aclient->initializationState;
620 while (*apos < MAXSERVERS) {
621 /* tc is the next conn to try */
622 tc = aclient->conns[*apos];
625 UNLOCK_UBIK_CLIENT(aclient);
630 if (rx_ConnError(tc)) {
631 tc = ubik_RefreshConn(tc);
632 aclient->conns[*apos] = tc;
635 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
636 (*apos)++; /* try another one if this server is down */
638 break; /* this is the desired path */
641 if (*apos >= MAXSERVERS) {
643 UNLOCK_UBIK_CLIENT(aclient);
649 (*aproc) (tc, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13,
651 if (aclient->initializationState != origLevel) {
653 UNLOCK_UBIK_CLIENT(aclient);
655 return code; /* somebody did a ubik_ClientInit */
658 /* what should I do in case of UNOQUORUM ? */
660 aclient->states[*apos] |= CFLastFailed; /* network errors */
662 /* either misc ubik code, or misc application code or success. */
663 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
668 UNLOCK_UBIK_CLIENT(aclient);
674 * \brief Call this instead of stub and we'll guarantee to find a host that's up.
676 * \todo In the future, we should also put in a protocol to find the sync site.
679 ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10,
680 p11, p12, p13, p14, p15, p16)
682 register struct ubik_client *aclient;
701 afs_int32 code, rcode;
708 LOCK_UBIK_CLIENT(aclient);
711 origLevel = aclient->initializationState;
713 /* Do two passes. First pass only checks servers known running */
714 for (aflags |= UPUBIKONLY, pass = 0; pass < 2;
715 pass++, aflags &= ~UPUBIKONLY) {
720 CallIter(aproc, aclient, aflags, &count, p1, p2, p3, p4, p5,
721 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
723 if (code && (aclient->initializationState != origLevel)) {
726 if (code == UNOSERVERS) {
729 rcode = code; /* remember code from last good call */
731 if (code == UNOTSYNC) { /* means this requires a sync site */
732 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
733 temp = try_GetSyncSite(aclient, count);
734 if (aclient->initializationState != origLevel) {
735 goto restart; /* somebody did a ubik_ClientInit */
737 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
738 count = temp; /* generally try to make progress */
741 } else if ((code >= 0) && (code != UNOQUORUM)) {
742 UNLOCK_UBIK_CLIENT(aclient);
743 return code; /* success or global error condition */
747 UNLOCK_UBIK_CLIENT(aclient);
752 * \brief This is part of an iterator. It doesn't handle finding sync sites.
755 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
756 afs_int32 aflags, int *apos, long p1, long p2,
757 long p3, long p4, long p5, long p6, long p7,
758 long p8, long p9, long p10, long p11, long p12,
759 long p13, long p14, long p15, long p16)
761 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
762 p8, p9, p10, p11, p12, p13, p14, p15, p16, NEED_LOCK);