2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
11 #include "../afs/param.h"
12 #include "../afs/sysincludes.h"
13 #include "../afs/afsincludes.h"
14 #include "../afs/stds.h"
15 #include "../rx/xdr.h"
17 #include "../afs/lock.h"
18 #include "../afs/rxgen_consts.h"
19 #include "../afs/ubik.h"
20 #include "../afs/pthread_glock.h"
21 #else /* defined(UKERNEL) */
22 #include <afs/param.h>
24 #include <afs/pthread_glock.h>
34 #include <netinet/in.h>
36 #include <afs/rxgen_consts.h>
38 #endif /* defined(UKERNEL) */
41 afs_int32 ubik_CallIter();
42 short ubik_initializationState; /* initial state is zero */
46 * parse list for clients
48 int ubik_ParseClientList(
55 register struct hostent *th;
56 afs_int32 temp, counter;
59 inServer = 0; /* haven't seen -servers yet */
61 for(i=1; i<argc; i++) {
62 /* look for -servers argument */
66 if (*tp == '-') break; /* done */
67 /* otherwise this is a new host name */
69 th = gethostbyname(tp);
74 memmove((void *) &temp, (const void *) th->h_addr, sizeof(afs_int32));
76 if (counter++ >= MAXSERVERS) return UNHOSTS;
80 /* haven't seen a -server yet */
81 if (!strcmp(tp, "-servers")) {
87 /* never saw a -server */
90 if (counter < MAXSERVERS) *aothers++ = 0; /* null terminate if room */
94 #ifdef AFS_PTHREAD_ENV
98 static pthread_once_t random_once = PTHREAD_ONCE_INIT;
99 static int called_afs_random_once;
100 static pthread_key_t random_number_key;
102 static void afs_random_once(void)
104 assert(pthread_key_create(&random_number_key, NULL)==0);
105 called_afs_random_once = 1;
110 * Random number generator and constants from KnuthV2 2d ed, p170
114 * m is a power of two
116 * a is 0.73m should be 0.01m .. 0.99m
117 * c is more or less immaterial. 1 or a is suggested.
119 * NB: LOW ORDER BITS are not very random. To get small random numbers,
120 * treat result as <1, with implied binary point, and multiply by
122 * NB: Has to be unsigned, since shifts on signed quantities may preserve
125 * In this case, m == 2^32, the mod operation is implicit. a == pi, which
126 * is used because it has some interesting characteristics (lacks any
127 * interesting bit-patterns).
132 * use time and pid to try to get some initial randomness.
134 #if !defined(UKERNEL)
135 #define ranstage(x) (x)= (afs_uint32) (3141592621U*((afs_uint32)x)+1)
137 unsigned int afs_random(void)
139 #ifdef AFS_PTHREAD_ENV
142 (called_afs_random_once || pthread_once(&random_once, afs_random_once));
143 state = (afs_uint32) pthread_getspecific(random_number_key);
145 static afs_uint32 state = 0;
150 state = time(0) + getpid();
157 #ifdef AFS_PTHREAD_ENV
158 pthread_setspecific(random_number_key, (const void *) state);
165 * returns int 0..14 using the high bits of a pseudo-random number instead of
166 * the low bits, as the low bits are "less random" than the high ones...
167 * slight roundoff error exists, an excercise for the reader.
168 * need to multiply by something with lots of ones in it, so multiply by
169 * 8 or 16 is right out.
172 static unsigned int afs_randomMod15(void)
176 temp = afs_random() >> 4;
177 temp = (temp *15) >> 28;
181 #endif /* !defined(UKERNEL) */
186 #define abs(a) ((a) < 0 ? -1*(a) : (a))
188 register struct rx_connection **serverconns,
189 struct ubik_client **aclient)
194 register struct ubik_client *tc;
196 initialize_u_error_table();
198 if ( *aclient ) { /* the application is doing a re-initialization*/
199 LOCK_UBIK_CLIENT((*aclient))
200 /* this is an important defensive check */
201 if ( ! ((*aclient)->initializationState) ) {
202 UNLOCK_UBIK_CLIENT((*aclient))
203 return UREINITIALIZE;
206 /* release all existing connections */
207 for (tc = *aclient, i=0; i<MAXSERVERS; i++)
209 struct rx_connection *rxConn = ubik_GetRPCConn(tc,i);
210 if (rxConn == 0) break;
211 #ifdef AFS_PTHREAD_ENV
212 rx_ReleaseCachedConnection(rxConn);
214 rx_DestroyConnection (rxConn);
217 UNLOCK_UBIK_CLIENT((*aclient))
218 #ifdef AFS_PTHREAD_ENV
219 if (pthread_mutex_destroy(&((*aclient)->cm))) return UMUTEXDESTROY;
222 tc = (struct ubik_client *) malloc(sizeof(struct ubik_client));
224 if (tc == NULL) return UNOMEM;
225 memset((void *) tc, 0, sizeof(*tc));
226 #ifdef AFS_PTHREAD_ENV
227 if (pthread_mutex_init(&(tc->cm), (const pthread_mutexattr_t*)0)) {
231 tc->initializationState = ++ubik_initializationState;
233 /* first count the # of server conns so we can randomize properly */
235 for(i=0;i<MAXSERVERS;i++) {
236 if (serverconns[i] == (struct rx_connection *) 0) break;
240 /* here count is the # of servers we're actually passed in. Compute
241 * offset, a number between 0..count-1, where we'll start copying from the
242 * client-provided array. */
243 for (i=0; i< count; i++) {
244 offset = afs_randomMod15() % count;
245 for (j=abs(offset); j<2*count; j++) {
246 if (!tc->conns[abs(j%count)]) {
247 tc->conns[abs(j%count)] = serverconns[i];
258 * ubik_ClientDestroy - destroys a ubik connection. It calls rx to destroy the
259 * component rx connections, then frees the ubik connection structure.
262 afs_int32 ubik_ClientDestroy(struct ubik_client *aclient)
266 if (aclient == 0) return 0;
267 LOCK_UBIK_CLIENT(aclient);
268 for (c=0; c<MAXSERVERS; c++) {
269 struct rx_connection *rxConn = ubik_GetRPCConn(aclient,c);
270 if (rxConn == 0) break;
271 #ifdef AFS_PTHREAD_ENV
272 rx_ReleaseCachedConnection(rxConn);
274 rx_DestroyConnection (rxConn);
277 aclient->initializationState = 0; /* client in not initialized*/
278 UNLOCK_UBIK_CLIENT(aclient);
279 #ifdef AFS_PTHREAD_ENV
280 pthread_mutex_destroy(&(aclient->cm)); /* ignore failure */
287 * RefreshConn -- So that intermittent failures that cause connections to die
288 * don't kill whole ubik connection, refresh them when the connection is in
292 static struct rx_connection *RefreshConn(struct rx_connection *tc)
297 struct rx_securityClass *sc;
299 struct rx_connection *newTc;
301 host = rx_HostOf(rx_PeerOf(tc));
302 port = rx_PortOf(rx_PeerOf(tc));
303 service = rx_ServiceIdOf(tc);
304 sc = rx_SecurityObjectOf(tc);
305 si = rx_SecurityClassOf(tc);
308 * destroy old one after creating new one so that refCount on security
309 * object cannot reach zero.
311 newTc = rx_NewConnection (host, port, service, sc, si);
312 rx_DestroyConnection (tc);
316 #ifdef AFS_PTHREAD_ENV
318 pthread_once_t ubik_client_once = PTHREAD_ONCE_INIT;
319 pthread_mutex_t ubik_client_mutex;
320 #define LOCK_UCLNT_CACHE \
321 assert(pthread_once(&ubik_client_once, ubik_client_init_mutex) == 0 && \
322 pthread_mutex_lock(&ubik_client_mutex)==0);
323 #define UNLOCK_UCLNT_CACHE assert(pthread_mutex_unlock(&ubik_client_mutex)==0);
325 void ubik_client_init_mutex() {
326 assert(pthread_mutex_init(&ubik_client_mutex, NULL) == 0);
331 #define LOCK_UCLNT_CACHE
332 #define UNLOCK_UCLNT_CACHE
337 static int *calls_needsync[SYNCCOUNT]; /* proc calls that need the sync site */
338 static int synccount=0;
341 * call this instead of stub and we'll guarantee to find a host that's up.
342 * in the future, we should also put in a protocol to find the sync site
344 afs_int32 ubik_Call(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8, p9,
345 p10, p11, p12, p13, p14, p15, p16)
347 register struct ubik_client *aclient;
366 afs_int32 rcode, code, newHost, thisHost, i, count;
367 int chaseCount, pass, needsync, inlist, j;
368 struct rx_connection *tc;
372 if (!aclient) return UNOENT;
373 LOCK_UBIK_CLIENT(aclient);
376 origLevel = aclient->initializationState;
378 chaseCount = inlist = needsync = 0;
381 for (j=0; ((j<SYNCCOUNT) && calls_needsync[j]); j++) {
382 if (calls_needsync[j] == (int *)aproc) {
383 inlist = needsync = 1;
390 * First pass, we try all servers that are up.
391 * Second pass, we try all servers.
393 for (pass=0; pass<2; pass++) { /*p*/
394 /* For each entry in our servers list */
395 for (count=0; ;count++) { /*s*/
398 /* Need a sync site. Lets try to quickly find it */
399 if (aclient->syncSite) {
400 newHost = aclient->syncSite; /* already in network order */
401 aclient->syncSite = 0; /* Will reset if it works */
402 } else if (aclient->conns[3]) {
403 /* If there are fewer than four db servers in a cell,
404 * there's no point in making the GetSyncSite call.
405 * At best, it's a wash. At worst, it results in more
406 * RPCs than you would otherwise make.
408 tc = aclient->conns[count];
409 if (tc && rx_ConnError(tc)) {
410 aclient->conns[count] = tc = RefreshConn(tc);
413 code = VOTE_GetSyncSite(tc, &newHost);
414 if ( aclient->initializationState != origLevel)
415 goto restart; /* somebody did a ubik_ClientInit */
416 if (code) newHost = 0;
417 newHost = htonl(newHost); /* convert to network order */
422 /* position count at the appropriate slot in the client
423 * structure and retry. If we can't find in slot, we'll
424 * just continue through the whole list
426 for (i=0; i<MAXSERVERS && aclient->conns[i]; i++) {
427 rxp = rx_PeerOf(aclient->conns[i]);
428 thisHost = rx_HostOf(rxp);
429 if (!thisHost) break;
430 if (thisHost == newHost) {
431 if (chaseCount++ > 2) break; /* avoid loop asking */
432 count = i; /* this index is the sync site */
439 tc = aclient->conns[count];
440 if (tc && rx_ConnError(tc)) {
441 aclient->conns[count] = tc = RefreshConn(tc);
445 if ((pass == 0) && (aclient->states[count] & CFLastFailed)) {
446 continue; /* this guy's down */
449 rcode = (*aproc)(tc, p1, p2, p3, p4, p5, p6, p7, p8, p9,
450 p10, p11, p12, p13, p14, p15, p16);
451 if ( aclient->initializationState != origLevel) {
452 /* somebody did a ubik_ClientInit */
453 if ( rcode ) goto restart; /* call failed */
454 else goto done; /* call suceeded */
456 if (rcode < 0) { /* network errors */
457 aclient->states[count] |= CFLastFailed; /* Mark serer down */
459 else if (rcode == UNOTSYNC) {
462 else if (rcode != UNOQUORUM) {
463 /* either misc ubik code, or misc appl code, or success. */
464 aclient->states[count] &= ~CFLastFailed; /* mark server up */
465 goto done; /* all done */
472 if (!inlist) { /* Remember proc call that needs sync site */
474 calls_needsync[synccount % SYNCCOUNT] = (int *)aproc;
479 if (!rcode) { /* Remember the sync site - cmd successful */
480 rxp = rx_PeerOf(aclient->conns[count]);
481 aclient->syncSite = rx_HostOf(rxp);
484 UNLOCK_UBIK_CLIENT(aclient);
491 * call this after getting back a UNOTSYNC
492 * note that getting a UNOTSYNC error code back does *not* guarantee
493 * that there is a sync site yet elected. However, if there is a sync
494 * site out there somewhere, and you're trying an operation that
495 * requires a sync site, ubik will return UNOTSYNC, indicating the
496 * operation won't work until you find a sync site
498 static int try_GetSyncSite(register struct ubik_client *aclient, afs_int32 apos) {
502 afs_int32 thisHost, newHost;
503 struct rx_connection *tc;
506 origLevel = aclient->initializationState;
509 tc = aclient->conns[apos];
510 if (tc && rx_ConnError (tc)) {
511 aclient->conns[apos] = (tc = RefreshConn (tc));
517 /* now see if we can find the sync site host */
518 code = VOTE_GetSyncSite(tc, &newHost);
519 if ( aclient->initializationState != origLevel) {
520 return -1; /* somebody did a ubik_ClientInit */
523 if ( !code && newHost ) {
524 newHost = htonl(newHost); /* convert back to network order */
527 * position count at the appropriate slot in the client
528 * structure and retry. If we can't find in slot, we'll just
529 * continue through the whole list
531 for(i=0;i<MAXSERVERS;i++) {
532 rxp = rx_PeerOf(aclient->conns[i]);
533 thisHost = rx_HostOf(rxp);
537 else if (thisHost == newHost) {
538 return i; /* we were told to use this one */
546 * Create an internal version of ubik_CallIter that takes an additional
547 * parameter - to indicate whether the ubik client handle has already
554 static afs_int32 CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6,
555 p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, needlock)
557 register struct ubik_client *aclient;
578 register afs_int32 code;
579 struct rx_connection *tc;
583 LOCK_UBIK_CLIENT(aclient)
585 origLevel = aclient->initializationState;
589 while (*apos < MAXSERVERS)
591 /* tc is the next conn to try */
592 tc = aclient->conns[*apos];
595 UNLOCK_UBIK_CLIENT(aclient)
600 if (rx_ConnError (tc)) {
601 tc = RefreshConn (tc);
602 aclient->conns[*apos] = tc;
605 if ((aflags & UPUBIKONLY) && (aclient->states[*apos] & CFLastFailed)) {
606 (*apos)++; /* try another one if this server is down */
609 break; /* this is the desired path */
612 if (*apos >= MAXSERVERS) {
614 UNLOCK_UBIK_CLIENT(aclient)
619 code = (*aproc)(tc,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16);
620 if ( aclient->initializationState != origLevel) {
622 UNLOCK_UBIK_CLIENT(aclient)
624 return code; /* somebody did a ubik_ClientInit */
627 /* what should I do in case of UNOQUORUM ? */
629 aclient->states[*apos] |= CFLastFailed; /* network errors */
632 /* either misc ubik code, or misc application code or success. */
633 aclient->states[*apos] &= ~CFLastFailed; /* operation worked */
638 UNLOCK_UBIK_CLIENT(aclient)
644 * call this instead of stub and we'll guarantee to find a host that's up.
645 * in the future, we should also put in a protocol to find the sync site
647 afs_int32 ubik_Call_New(aproc, aclient, aflags, p1, p2, p3, p4, p5, p6, p7, p8,
648 p9, p10, p11, p12, p13, p14, p15, p16)
650 register struct ubik_client *aclient;
669 afs_int32 code, rcode;
676 LOCK_UBIK_CLIENT(aclient)
679 origLevel = aclient->initializationState;
681 /* Do two passes. First pass only checks servers known running */
682 for (aflags |= UPUBIKONLY, pass=0; pass<2; pass++, aflags &= ~UPUBIKONLY) {
686 code = CallIter(aproc, aclient, aflags, &count, p1,p2,p3,p4,
687 p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,NO_LOCK);
688 if ( code && ( aclient->initializationState != origLevel)) {
691 if (code == UNOSERVERS) {
694 rcode = code; /* remember code from last good call */
696 if (code == UNOTSYNC) { /* means this requires a sync site */
697 if (aclient->conns[3]) { /* don't bother unless 4 or more srv */
698 temp = try_GetSyncSite(aclient, count);
699 if ( aclient->initializationState != origLevel) {
700 goto restart; /* somebody did a ubik_ClientInit */
702 if ((temp >= 0) && ((temp > count) || (stepBack++ <= 2))) {
703 count = temp; /* generally try to make progress */
707 else if ((code >= 0) && (code != UNOQUORUM)) {
708 UNLOCK_UBIK_CLIENT(aclient)
709 return code; /* success or global error condition */
713 UNLOCK_UBIK_CLIENT(aclient)
718 * This is part of an iterator. It doesn't handle finding sync sites
720 afs_int32 ubik_CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5, p6, p7,
721 p8, p9, p10, p11, p12, p13, p14, p15, p16)
723 register struct ubik_client *aclient;
743 return CallIter(aproc, aclient, aflags, apos, p1, p2, p3, p4, p5,
744 p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,