2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
17 /*! \name ubik_trans types */
18 #define UBIK_READTRANS 0
19 #define UBIK_WRITETRANS 1
22 /*! \name ubik_lock types */
28 /*! \name ubik client flags */
29 #define UPUBIKONLY 1 /*!< only check servers presumed functional */
30 #define UBIK_CALL_NEW 2 /*!< use the semantics of ubik_Call_New */
33 /*! \name RX services types */
34 #define VOTE_SERVICE_ID 50
35 #define DISK_SERVICE_ID 51
36 #define USER_SERVICE_ID 52 /*!< Since most applications use same port! */
39 #define UBIK_MAGIC 0x354545
41 /*! \name global ubik parameters */
42 #define MAXSERVERS 20 /*!< max number of servers */
45 /*! version comparison macro */
46 #define vcmp(a,b) ((a).epoch == (b).epoch? ((a).counter - (b).counter) : ((a).epoch - (b).epoch))
48 /*! \name ubik_client state bits */
49 #define CFLastFailed 1 /*!< last call failed to this guy (to detect down hosts) */
52 #ifdef AFS_PTHREAD_ENV
58 /*! Sanity check: This macro represents an arbitrary date in the past
59 * (Tue Jun 20 15:36:43 2017). The database epoch must be greater or
60 * equal to this value. */
61 #define UBIK_MILESTONE 1497987403
64 * \brief per-client structure for ubik
67 short initializationState; /*!< ubik client init state */
68 short states[MAXSERVERS]; /*!< state bits */
69 struct rx_connection *conns[MAXSERVERS];
71 #ifdef AFS_PTHREAD_ENV
76 #ifdef AFS_PTHREAD_ENV
77 #define LOCK_UBIK_CLIENT(client) opr_mutex_enter(&client->cm)
78 #define UNLOCK_UBIK_CLIENT(client) opr_mutex_exit(&client->cm)
80 #define LOCK_UBIK_CLIENT(client)
81 #define UNLOCK_UBIK_CLIENT(client)
84 #define ubik_GetRPCConn(astr,aindex) ((aindex) >= MAXSERVERS? 0 : (astr)->conns[aindex])
85 #define ubik_GetRPCHost(astr,aindex) ((aindex) >= MAXSERVERS? 0 : (astr)->hosts[aindex])
88 * \brief ubik header file structure
91 afs_int32 magic; /*!< magic number */
92 short pad1; /*!< some 0-initd padding */
93 short size; /*!< header allocation size */
94 struct ubik_version version; /*!< the version for this file */
98 * \brief representation of a ubik transaction
101 struct ubik_dbase *dbase; /*!< corresponding database */
102 struct ubik_trans *next; /*!< in the list */
103 afs_int32 locktype; /*!< transaction lock */
104 struct ubik_trunc *activeTruncs; /*!< queued truncates */
105 struct ubik_tid tid; /*!< transaction id of this trans (if write trans.) */
106 afs_int32 minCommitTime; /*!< time before which this trans can't commit */
107 afs_int32 seekFile; /*!< seek ptr: file number */
108 afs_int32 seekPos; /*!< seek ptr: offset therein */
109 short flags; /*!< trans flag bits */
110 char type; /*!< type of trans */
111 iovec_wrt iovec_info;
112 iovec_buf iovec_data;
116 * \brief representation of a truncation operation
119 struct ubik_trunc *next;
120 afs_int32 file; /*!< file to truncate */
121 afs_int32 length; /*!< new size */
128 #include <lock.h> /* just to make sure we've got this */
131 * \brief representation of a ubik database.
133 * Contains info on low-level disk access routines
134 * for use by disk transaction module.
137 char *pathName; /*!< root name for dbase */
138 struct ubik_trans *activeTrans; /*!< active transaction list */
139 struct ubik_version version; /*!< version number */
140 #ifdef AFS_PTHREAD_ENV
141 pthread_mutex_t versionLock; /*!< lock on version number */
143 struct Lock versionLock; /*!< lock on version number */
145 afs_int32 tidCounter; /*!< last RW or RO trans tid counter */
146 afs_int32 writeTidCounter; /*!< last write trans tid counter */
147 afs_int32 flags; /*!< flags */
148 /* physio procedures */
149 int (*read) (struct ubik_dbase * adbase, afs_int32 afile, void *abuffer,
150 afs_int32 apos, afs_int32 alength);
151 int (*write) (struct ubik_dbase * adbase, afs_int32 afile, void *abuffer,
152 afs_int32 apos, afs_int32 alength);
153 int (*truncate) (struct ubik_dbase * adbase, afs_int32 afile,
155 int (*sync) (struct ubik_dbase * adbase, afs_int32 afile);
156 int (*stat) (struct ubik_dbase * adbase, afs_int32 afid,
157 struct ubik_stat * astat);
158 void (*open) (struct ubik_dbase * adbase, afs_int32 afid);
159 int (*setlabel) (struct ubik_dbase * adbase, afs_int32 afile, struct ubik_version * aversion); /*!< set the version label */
160 int (*getlabel) (struct ubik_dbase * adbase, afs_int32 afile, struct ubik_version * aversion); /*!< retrieve the version label */
161 int (*getnfiles) (struct ubik_dbase * adbase); /*!< find out number of files */
162 int (*buffered_append)(struct ubik_dbase *adbase, afs_int32 afid, void *adata, afs_int32 alength);
163 short readers; /*!< number of current read transactions */
164 struct ubik_version cachedVersion; /*!< version of caller's cached data */
165 struct Lock cache_lock; /*!< protects cached application data */
166 #ifdef AFS_PTHREAD_ENV
167 pthread_cond_t flags_cond; /*!< condition variable to manage changes to flags */
172 * ubik_CheckCache callback function.
174 * @param[in] atrans ubik transaction
175 * @param[in] rock rock passed to ubik_CheckCache
177 * @return operation status
178 * @retval 0 cache was read properly
180 typedef int (*ubik_updatecache_func) (struct ubik_trans *atrans, void *rock);
182 /*! \name procedures for automatically authenticating ubik connections */
183 extern int (*ubik_CRXSecurityProc) (void *, struct rx_securityClass **,
185 extern void *ubik_CRXSecurityRock;
186 extern int (*ubik_SRXSecurityProc) (void *, struct rx_securityClass **,
188 extern void *ubik_SRXSecurityRock;
189 extern int (*ubik_CheckRXSecurityProc) (void *, struct rx_call *);
190 extern void *ubik_CheckRXSecurityRock;
192 extern void ubik_SetClientSecurityProcs(int (*scproc)(void *,
193 struct rx_securityClass **,
195 int (*checkproc) (void *),
197 extern void ubik_SetServerSecurityProcs
198 (void (*buildproc) (void *,
199 struct rx_securityClass ***,
201 int (*checkproc) (void *, struct rx_call *),
207 * For applications that make use of ubik_BeginTransReadAnyWrite, writing
208 * processes must not update the application-level cache as they write,
209 * or else readers can read the new cache before the data is committed to
210 * the db. So, when a commit occurs, the cache must be updated right then.
211 * If set, this function will be called during commits of write transactions,
212 * to update the application-level cache after a write. This will be called
213 * immediately after the local disk commit succeeds, and it will be called
214 * with a lock held that prevents other threads from reading from the cache
215 * or the db in general.
217 * Note that this function MUST be set in order to make use of
218 * ubik_BeginTransReadAnyWrite.
220 extern int (*ubik_SyncWriterCacheProc) (void);
222 /****************INTERNALS BELOW ****************/
224 #ifdef UBIK_INTERNALS
225 /*! \name some ubik parameters */
226 #define UBIK_PAGESIZE 1024 /*!< fits in current r packet */
227 #define UBIK_LOGPAGESIZE 10 /*!< base 2 log thereof */
228 #define NBUFFERS 20 /*!< number of 1K buffers */
229 #define HDRSIZE 64 /*!< bytes of header per dbfile */
232 /*! \name ubik_dbase flags */
233 #define DBWRITING 1 /*!< are any write trans. in progress */
236 /*!\name ubik trans flags */
237 #define TRDONE 1 /*!< commit or abort done */
238 #define TRABORT 2 /*!< if #TRDONE, tells if aborted */
239 #define TRREADANY 4 /*!< read any data available in trans */
240 #define TRCACHELOCKED 32 /*!< this trans has locked dbase->cache_lock
241 * (meaning, this trans has called
242 * ubik_CheckCache at some point */
243 #define TRREADWRITE 64 /*!< read even if there's a conflicting ubik-
244 * level write lock */
247 /*! \name ubik_lock flags */
251 /*! \name ubik system database numbers */
255 /*! \name define log opcodes */
256 #define LOGNEW 100 /*!< start transaction */
257 #define LOGEND 101 /*!< commit (good) end transaction */
258 #define LOGABORT 102 /*!< abort (fail) transaction */
259 #define LOGDATA 103 /*!< data */
260 #define LOGTRUNCATE 104 /*!< truncate operation */
264 * \name timer constants
265 * time constant for replication algorithms: the R time period is 20 seconds. Both
266 * #SMALLTIME and #BIGTIME must be larger than #RPCTIMEOUT+max(#RPCTIMEOUT, #POLLTIME),
267 * so that timeouts do not prevent us from getting through to our servers in time.
269 * We use multi-R to time out multiple down hosts concurrently.
270 * The only other restrictions: #BIGTIME > #SMALLTIME and
271 * #BIGTIME-#SMALLTIME > #MAXSKEW (the clock skew).
275 #define RPCTIMEOUT 20
281 * \brief the per-server state, used by the sync site to keep track of its charges
284 struct ubik_server *next; /*!< next ptr */
285 afs_uint32 addr[UBIK_MAX_INTERFACE_ADDR]; /*!< network order, addr[0] is primary */
286 afs_int32 lastVoteTime; /*!< last time yes vote received */
287 afs_int32 lastBeaconSent; /*!< last time beacon attempted */
288 struct ubik_version version; /*!< version, only used during recovery */
289 struct rx_connection *vote_rxcid; /*!< cid to use to contact dude for votes */
290 struct rx_connection *disk_rxcid; /*!< cid to use to contact dude for disk reqs */
291 char lastVote; /*!< true if last vote was yes */
292 char up; /*!< is it up? */
293 char beaconSinceDown; /*!< did beacon get through since last crash? */
294 char currentDB; /*!< is dbase up-to-date */
295 char magic; /*!< the one whose vote counts twice */
296 char isClone; /*!< is only a clone, doesn't vote */
299 /*! \name hold and release functions on a database */
300 #ifdef AFS_PTHREAD_ENV
301 # define DBHOLD(a) opr_mutex_enter(&((a)->versionLock))
302 # define DBRELE(a) opr_mutex_exit(&((a)->versionLock))
303 #else /* !AFS_PTHREAD_ENV */
304 # define DBHOLD(a) ObtainWriteLock(&((a)->versionLock))
305 # define DBRELE(a) ReleaseWriteLock(&((a)->versionLock))
306 #endif /* !AFS_PTHREAD_ENV */
311 /*!name list of all servers in the system */
312 extern struct ubik_server *ubik_servers;
313 extern char amIClone;
316 /*! \name network port info */
317 extern short ubik_callPortal;
320 /*! \name urecovery state bits for sync site */
321 #define UBIK_RECSYNCSITE 1 /* am sync site */
322 #define UBIK_RECFOUNDDB 2 /* found acceptable dbase from quorum */
323 #define UBIK_RECHAVEDB 4 /* fetched best dbase */
324 #define UBIK_RECLABELDB 8 /* relabelled dbase */
325 #define UBIK_RECSENTDB 0x10 /* sent best db to *everyone* */
326 #define UBIK_RECSBETTER UBIK_RECLABELDB /* last state */
329 extern afs_int32 ubik_quorum; /* min hosts in quorum */
330 extern struct ubik_dbase *ubik_dbase; /* the database handled by this server */
331 extern afs_uint32 ubik_host[UBIK_MAX_INTERFACE_ADDR]; /* this host addr, in net order */
332 extern int ubik_amSyncSite; /* sleep on this waiting to be sync site */
333 extern struct ubik_stats { /* random stats */
336 extern afs_int32 urecovery_state; /* sync site recovery process state */
337 extern struct ubik_trans *ubik_currentTrans; /* current trans */
338 extern afs_int32 ubik_debugFlag; /* ubik debug flag */
339 extern int ubikPrimaryAddrOnly; /* use only primary address */
344 * Any of the locks may be acquired singly; when acquiring multiple locks, they
345 * should be acquired in the listed order:
346 * application cache lock (dbase->cache_lock)
347 * database lock DBHOLD/DBRELE
348 * beacon lock UBIK_BEACON_LOCK/UNLOCK
349 * vote lock UBIK_VOTE_LOCK/UNLOCK
350 * version lock UBIK_VERSION_LOCK/UNLOCK
351 * server address lock UBIK_ADDR_LOCK/UNLOCK
355 * \brief Global beacon data. All values are protected by beacon_lock
356 * This lock also protects some values in the ubik_server structures:
364 #ifdef AFS_PTHREAD_ENV
365 pthread_mutex_t beacon_lock;
367 int ubik_amSyncSite; /*!< flag telling if I'm sync site */
368 afs_int32 syncSiteUntil; /*!< valid only if amSyncSite */
369 int ubik_syncSiteAdvertised; /*!< flag telling if remotes are aware we have quorum */
372 #define UBIK_BEACON_LOCK opr_mutex_enter(&beacon_globals.beacon_lock)
373 #define UBIK_BEACON_UNLOCK opr_mutex_exit(&beacon_globals.beacon_lock)
376 * \brief Global vote data. All values are protected by vote_lock
379 #ifdef AFS_PTHREAD_ENV
380 pthread_mutex_t vote_lock;
382 struct ubik_version ubik_dbVersion; /* sync site's dbase version */
383 struct ubik_tid ubik_dbTid; /* sync site's tid, or 0 if none */
384 /* Used by all sites in nominating new sync sites */
385 afs_int32 ubik_lastYesTime; /* time we sent the last yes vote */
386 afs_uint32 lastYesHost; /* host to which we sent yes vote */
387 /* Next is time sync site began this vote: guarantees sync site until this + SMALLTIME */
388 afs_int32 lastYesClaim;
389 int lastYesState; /* did last site we voted for claim to be sync site? */
390 /* Used to guarantee that nomination process doesn't loop */
391 afs_int32 lowestTime;
392 afs_uint32 lowestHost;
397 #define UBIK_VOTE_LOCK opr_mutex_enter(&vote_globals.vote_lock)
398 #define UBIK_VOTE_UNLOCK opr_mutex_exit(&vote_globals.vote_lock)
401 * \brief Server address data. All values are protected by addr_lock
403 * This lock also protects:
404 * ubik_server: addr[], vote_rxcid, disk_rxcid
408 #ifdef AFS_PTHREAD_ENV
409 pthread_mutex_t addr_lock;
411 afs_int32 ubikSecIndex;
412 struct rx_securityClass *ubikSecClass;
415 #define UBIK_ADDR_LOCK opr_mutex_enter(&addr_globals.addr_lock)
416 #define UBIK_ADDR_UNLOCK opr_mutex_exit(&addr_globals.addr_lock)
419 * \brief The version lock protects the structure member, as well as
420 * the database version, flags, tidCounter, writeTidCounter
422 struct version_data {
423 #ifdef AFS_PTHREAD_ENV
424 pthread_mutex_t version_lock;
426 afs_int32 ubik_epochTime; /* time when this site started */
429 #define UBIK_VERSION_LOCK opr_mutex_enter(&version_globals.version_lock)
430 #define UBIK_VERSION_UNLOCK opr_mutex_exit(&version_globals.version_lock)
433 extern int uphys_stat(struct ubik_dbase *adbase, afs_int32 afid,
434 struct ubik_stat *astat);
435 extern int uphys_read(struct ubik_dbase *adbase, afs_int32 afile,
436 void *abuffer, afs_int32 apos,
438 extern int uphys_write(struct ubik_dbase *adbase, afs_int32 afile,
439 void *abuffer, afs_int32 apos,
441 extern int uphys_truncate(struct ubik_dbase *adbase, afs_int32 afile,
443 extern int uphys_getnfiles(struct ubik_dbase *adbase);
444 extern int uphys_getlabel(struct ubik_dbase *adbase, afs_int32 afile,
445 struct ubik_version *aversion);
446 extern int uphys_setlabel(struct ubik_dbase *adbase, afs_int32 afile,
447 struct ubik_version *aversion);
448 extern int uphys_sync(struct ubik_dbase *adbase, afs_int32 afile);
449 extern void uphys_invalidate(struct ubik_dbase *adbase,
451 extern int uphys_buf_append(struct ubik_dbase *adbase, afs_int32 afid,
452 void *buf, afs_int32 alength);
454 /*! \name recovery.c */
455 extern int urecovery_ResetState(void);
456 extern int urecovery_LostServer(struct ubik_server *server);
457 extern int urecovery_AllBetter(struct ubik_dbase *adbase,
459 extern int urecovery_AbortAll(struct ubik_dbase *adbase);
460 extern int urecovery_CheckTid(struct ubik_tid *atid, int abortalways);
461 extern int urecovery_Initialize(struct ubik_dbase *adbase);
462 extern void *urecovery_Interact(void *);
463 extern int DoProbe(struct ubik_server *server);
467 extern afs_int32 ContactQuorum_DISK_SetVersion(struct ubik_trans *atrans,
469 ubik_version *OldVersion,
470 ubik_version *NewVersion);
472 extern void panic(char *format, ...)
473 AFS_ATTRIBUTE_FORMAT(__printf__, 1, 2);
475 extern afs_uint32 ubikGetPrimaryInterfaceAddr(afs_uint32 addr);
477 extern int ubik_CheckAuth(struct rx_call *);
481 /*! \name beacon.c */
483 extern void ubeacon_InitSecurityClass(void);
484 extern void ubeacon_ReinitServer(struct ubik_server *ts);
485 extern void ubeacon_Debug(struct ubik_debug *aparm);
486 extern int ubeacon_AmSyncSite(void);
487 extern int ubeacon_SyncSiteAdvertised(void);
488 extern int ubeacon_InitServerListByInfo(afs_uint32 ame,
489 struct afsconf_cell *info,
491 extern int ubeacon_InitServerList(afs_uint32 ame, afs_uint32 aservers[]);
492 extern void *ubeacon_Interact(void *);
493 extern int ubeacon_updateUbikNetworkAddress(afs_uint32 ubik_host[]);
494 extern struct beacon_data beacon_globals;
495 extern struct addr_data addr_globals;
500 extern int udisk_Init(int nBUffers);
501 extern void udisk_Debug(struct ubik_debug *aparm);
502 extern int udisk_Invalidate(struct ubik_dbase *adbase, afs_int32 afid);
503 extern int udisk_read(struct ubik_trans *atrans, afs_int32 afile,
504 void *abuffer, afs_int32 apos, afs_int32 alen);
505 extern int udisk_truncate(struct ubik_trans *atrans, afs_int32 afile,
507 extern int udisk_write(struct ubik_trans *atrans, afs_int32 afile,
508 void *abuffer, afs_int32 apos, afs_int32 alen);
509 extern int udisk_begin(struct ubik_dbase *adbase, int atype,
510 struct ubik_trans **atrans);
511 extern int udisk_commit(struct ubik_trans *atrans);
512 extern int udisk_abort(struct ubik_trans *atrans);
513 extern int udisk_end(struct ubik_trans *atrans);
517 extern void ulock_Init(void);
518 extern int ulock_getLock(struct ubik_trans *atrans, int atype, int await);
519 extern void ulock_relLock(struct ubik_trans *atrans);
520 extern void ulock_Debug(struct ubik_debug *aparm);
524 extern int uvote_ShouldIRun(void);
525 extern afs_int32 uvote_GetSyncSite(void);
526 extern int uvote_Init(void);
527 extern struct vote_data vote_globals;
528 extern void uvote_set_dbVersion(struct ubik_version);
529 extern int uvote_eq_dbVersion(struct ubik_version);
530 extern int uvote_HaveSyncAndVersion(struct ubik_version);
533 #endif /* UBIK_INTERNALS */
535 extern afs_int32 ubik_nBuffers;
538 * \name Public function prototypes
543 extern int ubik_ServerInitByInfo(afs_uint32 myHost, short myPort,
544 struct afsconf_cell *info, char clones[],
545 const char *pathName,
546 struct ubik_dbase **dbase);
547 extern int ubik_ServerInit(afs_uint32 myHost, short myPort,
548 afs_uint32 serverList[],
549 const char *pathName, struct ubik_dbase **dbase);
550 extern int ubik_BeginTrans(struct ubik_dbase *dbase,
551 afs_int32 transMode, struct ubik_trans **transPtr);
552 extern int ubik_BeginTransReadAny(struct ubik_dbase *dbase,
554 struct ubik_trans **transPtr);
555 extern int ubik_BeginTransReadAnyWrite(struct ubik_dbase *dbase,
557 struct ubik_trans **transPtr);
558 extern int ubik_AbortTrans(struct ubik_trans *transPtr);
560 extern int ubik_EndTrans(struct ubik_trans *transPtr);
561 extern int ubik_Read(struct ubik_trans *transPtr, void *buffer,
563 extern int ubik_Flush(struct ubik_trans *transPtr);
564 extern int ubik_Write(struct ubik_trans *transPtr, void *buffer,
566 extern int ubik_Seek(struct ubik_trans *transPtr, afs_int32 fileid,
568 extern int ubik_Tell(struct ubik_trans *transPtr, afs_int32 * fileid,
569 afs_int32 * position);
570 extern int ubik_Truncate(struct ubik_trans *transPtr,
572 extern int ubik_SetLock(struct ubik_trans *atrans, afs_int32 apos,
573 afs_int32 alen, int atype);
574 extern int ubik_CheckCache(struct ubik_trans *atrans,
575 ubik_updatecache_func check,
577 extern struct version_data version_globals;
580 /*! \name ubikclient.c */
582 extern int ubik_ParseClientList(int argc, char **argv, afs_uint32 * aothers);
583 extern unsigned int afs_random(void);
584 extern int ubik_ClientInit(struct rx_connection **serverconns,
585 struct ubik_client **aclient);
586 extern afs_int32 ubik_ClientDestroy(struct ubik_client *aclient);
587 extern struct rx_connection *ubik_RefreshConn(struct rx_connection *tc);
588 #ifdef UBIK_LEGACY_CALLITER
589 extern afs_int32 ubik_CallIter(int (*aproc) (), struct ubik_client *aclient,
590 afs_int32 aflags, int *apos, long p1, long p2,
591 long p3, long p4, long p5, long p6, long p7,
592 long p8, long p9, long p10, long p11, long p12,
593 long p13, long p14, long p15, long p16);
594 extern afs_int32 ubik_Call_New(int (*aproc) (), struct ubik_client
595 *aclient, afs_int32 aflags, long p1, long p2,
596 long p3, long p4, long p5, long p6, long p7,
597 long p8, long p9, long p10, long p11, long p12,
598 long p13, long p14, long p15, long p16);
602 /* \name ubikcmd.c */
603 extern int ubik_ParseServerList(int argc, char **argv, afs_uint32 *ahost,
604 afs_uint32 *aothers);
609 struct rx_securityClass;
611 typedef int (*ugen_secproc_func)(struct rx_securityClass *, afs_int32);
612 extern int ugen_ClientInitCell(struct afsconf_dir *dir,
613 struct afsconf_cell *info,
615 struct ubik_client **uclientp,
616 int maxservers, const char *serviceid,
618 extern int ugen_ClientInitServer(const char *confDir, char *cellName,
619 int secFlags, struct ubik_client **uclientp,
620 int maxservers, char *serviceid,
621 int deadtime, afs_uint32 server,
623 extern int ugen_ClientInitFlags(const char *confDir, char *cellName,
624 int secFlags, struct ubik_client **uclientp,
625 ugen_secproc_func secproc,
626 int maxservers, char *serviceid,
628 extern afs_int32 ugen_ClientInit(int noAuthFlag, const char *confDir,
629 char *cellName, afs_int32 sauth,
630 struct ubik_client **uclientp,
631 ugen_secproc_func secproc,
633 afs_int32 gen_rxkad_level,
634 afs_int32 maxservers, char *serviceid,
635 afs_int32 deadtime, afs_uint32 server,
636 afs_uint32 port, afs_int32 usrvid);