2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 /* these are now appended by the error table compiler */
15 /* ubik error codes */
16 #define UMINCODE 100000 /* lowest ubik error code */
17 #define UNOQUORUM 100000 /* no quorum elected */
18 #define UNOTSYNC 100001 /* not synchronization site (should work on sync site) */
19 #define UNHOSTS 100002 /* too many hosts */
20 #define UIOERROR 100003 /* I/O error writing dbase or log */
21 #define UINTERNAL 100004 /* mysterious internal error */
22 #define USYNC 100005 /* major synchronization error */
23 #define UNOENT 100006 /* file not found when processing dbase */
24 #define UBADLOCK 100007 /* bad lock range size (must be 1) */
25 #define UBADLOG 100008 /* read error reprocessing log */
26 #define UBADHOST 100009 /* problems with host name */
27 #define UBADTYPE 100010 /* bad operation for this transaction type */
28 #define UTWOENDS 100011 /* two commits or aborts done to transaction */
29 #define UDONE 100012 /* operation done after abort (or commmit) */
30 #define UNOSERVERS 100013 /* no servers appear to be up */
31 #define UEOF 100014 /* premature EOF */
32 #define ULOGIO 100015 /* error writing log file */
33 #define UMAXCODE 100100 /* largest ubik error code */
39 #else /* defined(UKERNEL) */
41 #endif /* defined(UKERNEL) */
43 /* ubik_trans types */
44 #define UBIK_READTRANS 0
45 #define UBIK_WRITETRANS 1
50 #if !defined(UBIK_PAUSE)
52 #endif /* UBIK_PAUSE */
54 /* ubik client flags */
55 #define UPUBIKONLY 1 /* only check servers presumed functional */
57 /* RX services types */
58 #define VOTE_SERVICE_ID 50
59 #define DISK_SERVICE_ID 51
60 #define USER_SERVICE_ID 52 /* Since most applications use same port! */
62 #define UBIK_MAGIC 0x354545
64 /* global ubik parameters */
65 #define MAXSERVERS 20 /* max number of servers */
67 /* version comparison macro */
68 #define vcmp(a,b) ((a).epoch == (b).epoch? ((a).counter - (b).counter) : ((a).epoch - (b).epoch))
70 /* ubik_client state bits */
71 #define CFLastFailed 1 /* last call failed to this guy (to detect down hosts) */
73 #ifdef AFS_PTHREAD_ENV
78 /* per-client structure for ubik */
80 short initializationState; /* ubik client init state */
81 short states[MAXSERVERS]; /* state bits */
82 struct rx_connection *conns[MAXSERVERS];
84 #ifdef AFS_PTHREAD_ENV
89 #ifdef AFS_PTHREAD_ENV
90 #define LOCK_UBIK_CLIENT(client) assert(pthread_mutex_lock(&client->cm)==0);
91 #define UNLOCK_UBIK_CLIENT(client) assert(pthread_mutex_unlock(&client->cm)==0);
93 #define LOCK_UBIK_CLIENT(client)
94 #define UNLOCK_UBIK_CLIENT(client)
97 #define ubik_GetRPCConn(astr,aindex) ((aindex) >= MAXSERVERS? 0 : (astr)->conns[aindex])
98 #define ubik_GetRPCHost(astr,aindex) ((aindex) >= MAXSERVERS? 0 : (astr)->hosts[aindex])
100 /* ubik header file structure */
102 afs_int32 magic; /* magic number */
103 short pad1; /* some 0-initd padding */
104 short size; /* header allocation size */
105 struct ubik_version version; /* the version for this file */
108 /* representation of a ubik transaction */
110 struct ubik_dbase *dbase; /* corresponding database */
111 struct ubik_trans *next; /* in the list */
112 afs_int32 locktype; /* transaction lock */
113 struct ubik_trunc *activeTruncs;/* queued truncates */
114 struct ubik_tid tid; /* transaction id of this trans (if write trans.) */
115 afs_int32 minCommitTime; /* time before which this trans can't commit */
116 afs_int32 seekFile; /* seek ptr: file number */
117 afs_int32 seekPos; /* seek ptr: offset therein */
118 short flags; /* trans flag bits */
119 char type; /* type of trans */
120 iovec_wrt iovec_info;
121 iovec_buf iovec_data;
124 /* representation of a truncation operation */
126 struct ubik_trunc *next;
127 afs_int32 file; /* file to truncate */
128 afs_int32 length; /* new size */
137 #include "afs/lock.h"
138 #else /* defined(UKERNEL) */
139 #include <lock.h> /* just to make sure we've go this */
140 #endif /* defined(UKERNEL) */
142 /* representation of a ubik database. Contains info on low-level disk access routines
143 for use by disk transaction module.
146 char *pathName; /* root name for dbase */
147 struct ubik_trans *activeTrans; /* active transaction list */
148 struct ubik_version version; /* version number */
150 struct afs_lock versionLock; /* lock on version number */
151 #else /* defined(UKERNEL) */
152 struct Lock versionLock; /* lock on version number */
153 #endif /* defined(UKERNEL) */
154 afs_int32 tidCounter; /* last RW or RO trans tid counter */
155 afs_int32 writeTidCounter; /* last write trans tid counter */
156 afs_int32 flags; /* flags */
157 int (*read)(); /* physio procedures */
163 int (*setlabel)(); /* set the version label */
164 int (*getlabel)(); /* retrieve the version label */
165 int (*getnfiles)(); /* find out number of files */
166 short readers; /* number of current read transactions */
167 struct ubik_version cachedVersion; /* version of caller's cached data */
170 /* procedures for automatically authenticating ubik connections */
171 extern int (*ubik_CRXSecurityProc)();
172 extern char *ubik_CRXSecurityRock;
173 extern int (*ubik_SRXSecurityProc)();
174 extern char *ubik_SRXSecurityRock;
175 extern int (*ubik_CheckRXSecurityProc)();
176 extern char *ubik_CheckRXSecurityRock;
178 /****************INTERNALS BELOW ****************/
180 #ifdef UBIK_INTERNALS
181 /* some ubik parameters */
185 #define PAGESIZE 1024 /* fits in current r packet */
186 #define LOGPAGESIZE 10 /* base 2 log thereof */
187 #define NBUFFERS 20 /* number of 1K buffers */
188 #define HDRSIZE 64 /* bytes of header per dbfile */
190 /* ubik_dbase flags */
191 #define DBWRITING 1 /* are any write trans. in progress */
192 #if defined(UBIK_PAUSE)
193 #define DBVOTING 2 /* the beacon task is polling */
194 #endif /* UBIK_PAUSE */
196 /* ubik trans flags */
197 #define TRDONE 1 /* commit or abort done */
198 #define TRABORT 2 /* if TRDONE, tells if aborted */
199 #define TRREADANY 4 /* read any data available in trans */
200 #if defined(UBIK_PAUSE)
201 #define TRSETLOCK 8 /* SetLock is using trans */
202 #define TRSTALE 16 /* udisk_end during getLock */
203 #endif /* UBIK_PAUSE */
205 /* ubik_lock flags */
208 /* ubik system database numbers */
211 /* define log opcodes */
212 #define LOGNEW 100 /* start transaction */
213 #define LOGEND 101 /* commit (good) end transaction */
214 #define LOGABORT 102 /* abort (fail) transaction */
215 #define LOGDATA 103 /* data */
216 #define LOGTRUNCATE 104 /* truncate operation */
218 /* time constant for replication algorithms: the R time period is 20 seconds. Both SMALLTIME
219 and BIGTIME must be larger than RPCTIMEOUT+max(RPCTIMEOUT,POLLTIME),
220 so that timeouts do not prevent us from getting through to our servers in time.
222 We use multi-R to time out multiple down hosts concurrently.
223 The only other restrictions: BIGTIME > SMALLTIME and
224 BIGTIME-SMALLTIME > MAXSKEW (the clock skew).
228 #define RPCTIMEOUT 20
232 /* the per-server state, used by the sync site to keep track of its charges */
234 struct ubik_server *next; /* next ptr */
235 afs_uint32 addr[UBIK_MAX_INTERFACE_ADDR];/* network order, addr[0] is primary*/
236 afs_int32 lastVoteTime; /* last time yes vote received */
237 afs_int32 lastBeaconSent; /* last time beacon attempted */
238 struct ubik_version version; /* version, only used during recovery */
239 struct rx_connection *vote_rxcid; /* cid to use to contact dude for votes */
240 struct rx_connection *disk_rxcid; /* cid to use to contact dude for disk reqs */
241 char lastVote; /* true if last vote was yes */
242 char up; /* is it up? */
243 char beaconSinceDown; /* did beacon get through since last crash? */
244 char currentDB; /* is dbase up-to-date */
245 char magic; /* the one whose vote counts twice */
246 char isClone; /* is only a clone, doesn't vote */
249 /* hold and release functions on a database */
250 #define DBHOLD(a) ObtainWriteLock(&((a)->versionLock))
251 #define DBRELE(a) ReleaseWriteLock(&((a)->versionLock))
255 /* list of all servers in the system */
256 extern struct ubik_server *ubik_servers;
257 extern char amIClone;
259 /* network port info */
260 extern short ubik_callPortal;
262 /* urecovery state bits for sync site */
263 #define UBIK_RECSYNCSITE 1 /* am sync site */
264 #define UBIK_RECFOUNDDB 2 /* found acceptable dbase from quorum */
265 #define UBIK_RECHAVEDB 4 /* fetched best dbase */
266 #define UBIK_RECLABELDB 8 /* relabelled dbase */
267 #define UBIK_RECSENTDB 0x10 /* sent best db to *everyone* */
268 #define UBIK_RECSBETTER UBIK_RECLABELDB /* last state */
270 extern afs_int32 ubik_quorum; /* min hosts in quorum */
271 extern struct ubik_dbase *ubik_dbase; /* the database handled by this server */
272 extern afs_uint32 ubik_host[UBIK_MAX_INTERFACE_ADDR];/* this host addr, in net order */
273 extern int ubik_amSyncSite; /* sleep on this waiting to be sync site */
274 extern struct ubik_stats { /* random stats */
277 extern afs_int32 ubik_epochTime; /* time when this site started */
278 extern afs_int32 urecovery_state; /* sync site recovery process state */
279 extern struct ubik_trans *ubik_currentTrans; /* current trans */
280 extern struct ubik_version ubik_dbVersion; /* sync site's dbase version */
281 extern afs_int32 ubik_debugFlag; /* ubik debug flag */
282 extern int ubikPrimaryAddrOnly; /* use only primary address */
284 /* this extern gives the sync site's db version, with epoch of 0 if none yet */
286 extern int uphys_read();
287 extern int uphys_write();
288 extern int uphys_truncate();
289 extern int uphys_sync();
292 extern int uphys_open();
294 extern int uphys_stat();
295 extern int uphys_getlabel();
296 extern int uphys_setlabel();
297 extern int uphys_getnfiles();
298 extern int ubeacon_Interact();
299 extern int urecovery_Interact();
300 extern int sdisk_Interact();
301 extern int uvote_Interact();
302 extern int DISK_Abort();
303 extern int DISK_Begin();
304 extern int DISK_ReleaseLocks();
305 extern int DISK_Commit();
306 extern int DISK_Lock();
307 extern int DISK_Write();
308 extern int DISK_WriteV();
309 extern int DISK_Truncate();
310 extern int DISK_SetVersion();
311 #endif /* UBIK_INTERNALS */
313 extern afs_int32 ubik_nBuffers;
316 * Public function prototypes
319 extern int ubik_ParseClientList(
325 extern unsigned int afs_random(
329 extern int ubik_ClientInit(
330 register struct rx_connection **serverconns,
331 struct ubik_client **aclient
334 extern afs_int32 ubik_ClientDestroy(
335 struct ubik_client *aclient