2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 #include <sys/types.h>
20 #include <netinet/in.h>
26 #define UBIK_INTERNALS
29 int (*ubik_CheckRXSecurityProc)();
30 char *ubik_CheckRXSecurityRock;
32 /* routines for handling requests remotely-submitted by the sync site. These are
33 only write transactions (we don't propagate read trans), and there is at most one
34 write transaction extant at any one time.
37 struct ubik_trans *ubik_currentTrans = 0;
41 register struct rx_call *acall; {
42 register afs_int32 code;
43 if (ubik_CheckRXSecurityProc) {
44 code = (*ubik_CheckRXSecurityProc)(ubik_CheckRXSecurityRock, acall);
50 /* the rest of these guys handle remote execution of write
51 * transactions: this is the code executed on the other servers when a
52 * sync site is executing a write transaction.
54 SDISK_Begin(rxcall, atid)
55 register struct rx_call *rxcall;
56 struct ubik_tid *atid;
58 register afs_int32 code;
60 if (code = ubik_CheckAuth(rxcall)) {
64 urecovery_CheckTid(atid);
65 if (ubik_currentTrans) {
66 /* If the thread is not waiting for lock - ok to end it */
67 if (ubik_currentTrans->locktype != LOCKWAIT) {
68 udisk_end(ubik_currentTrans);
70 ubik_currentTrans = (struct ubik_trans *) 0;
72 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
73 if (!code && ubik_currentTrans) {
74 /* label this trans with the right trans id */
75 ubik_currentTrans->tid.epoch = atid->epoch;
76 ubik_currentTrans->tid.counter = atid->counter;
82 SDISK_Commit(rxcall, atid)
83 register struct rx_call *rxcall;
84 struct ubik_tid *atid;
86 register afs_int32 code;
87 register struct ubik_dbase *dbase;
89 if (code = ubik_CheckAuth(rxcall)) {
93 if (!ubik_currentTrans) {
97 * sanity check to make sure only write trans appear here
99 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
103 dbase = ubik_currentTrans->dbase;
105 urecovery_CheckTid(atid);
106 if (!ubik_currentTrans) {
111 code = udisk_commit(ubik_currentTrans);
113 /* sync site should now match */
114 ubik_dbVersion = ubik_dbase->version;
120 SDISK_ReleaseLocks(rxcall, atid)
121 register struct rx_call *rxcall;
122 struct ubik_tid *atid;
124 register struct ubik_dbase *dbase;
125 register afs_int32 code;
127 if (code = ubik_CheckAuth(rxcall)) {
131 if (!ubik_currentTrans) {
134 /* sanity check to make sure only write trans appear here */
135 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
139 dbase = ubik_currentTrans->dbase;
141 urecovery_CheckTid(atid);
142 if (!ubik_currentTrans) {
147 /* If the thread is not waiting for lock - ok to end it */
148 if (ubik_currentTrans->locktype != LOCKWAIT) {
149 udisk_end(ubik_currentTrans);
151 ubik_currentTrans = (struct ubik_trans *) 0;
156 SDISK_Abort(rxcall, atid)
157 register struct rx_call *rxcall;
158 struct ubik_tid *atid;
160 register afs_int32 code;
161 register struct ubik_dbase *dbase;
163 if (code = ubik_CheckAuth(rxcall)) {
167 if (!ubik_currentTrans) {
170 /* sanity check to make sure only write trans appear here */
171 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
175 dbase = ubik_currentTrans->dbase;
177 urecovery_CheckTid(atid);
178 if (!ubik_currentTrans) {
183 code = udisk_abort(ubik_currentTrans);
184 /* If the thread is not waiting for lock - ok to end it */
185 if (ubik_currentTrans->locktype != LOCKWAIT) {
186 udisk_end(ubik_currentTrans);
188 ubik_currentTrans = (struct ubik_trans *) 0;
193 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
194 register struct rx_call *rxcall;
195 struct ubik_tid *atid;
196 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
198 register afs_int32 code;
199 register struct ubik_dbase *dbase;
200 struct ubik_trans *ubik_thisTrans;
202 if (code = ubik_CheckAuth(rxcall)) {
205 if (!ubik_currentTrans) {
208 /* sanity check to make sure only write trans appear here */
209 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
215 dbase = ubik_currentTrans->dbase;
217 urecovery_CheckTid(atid);
218 if (!ubik_currentTrans) {
223 ubik_thisTrans = ubik_currentTrans;
224 code = ulock_getLock(ubik_currentTrans, atype, 1);
226 /* While waiting, the transaction may have been ended/
227 * aborted from under us (urecovery_CheckTid). In that
228 * case, end the transaction here.
230 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
231 udisk_end(ubik_thisTrans);
239 /* Write a vector of data */
240 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
241 register struct rx_call *rxcall;
242 struct ubik_tid *atid;
243 iovec_wrt *io_vector;
244 iovec_buf *io_buffer;
246 afs_int32 code, i, offset;
247 struct ubik_dbase *dbase;
248 struct ubik_iovec *iovec;
251 if (code = ubik_CheckAuth(rxcall)) {
254 if (!ubik_currentTrans) {
257 /* sanity check to make sure only write trans appear here */
258 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
262 dbase = ubik_currentTrans->dbase;
264 urecovery_CheckTid(atid);
265 if (!ubik_currentTrans) {
270 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
271 iobuf = (char *)io_buffer->iovec_buf_val;
272 for (i=0, offset=0; i<io_vector->iovec_wrt_len; i++) {
273 /* Sanity check for going off end of buffer */
274 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
277 code = udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
278 iovec[i].position, iovec[i].length);
282 offset += iovec[i].length;
289 SDISK_Write(rxcall, atid, afile, apos, adata)
290 register struct rx_call *rxcall;
291 struct ubik_tid *atid;
292 afs_int32 afile, apos;
293 register bulkdata *adata;
295 register afs_int32 code;
296 register struct ubik_dbase *dbase;
298 if (code = ubik_CheckAuth(rxcall)) {
301 if (!ubik_currentTrans) {
304 /* sanity check to make sure only write trans appear here */
305 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
309 dbase = ubik_currentTrans->dbase;
311 urecovery_CheckTid(atid);
312 if (!ubik_currentTrans) {
316 code = udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos, adata->bulkdata_len);
321 SDISK_Truncate(rxcall, atid, afile, alen)
322 register struct rx_call *rxcall;
323 struct ubik_tid *atid;
327 register afs_int32 code;
328 register struct ubik_dbase *dbase;
330 if (code = ubik_CheckAuth(rxcall)) {
333 if (!ubik_currentTrans) {
336 /* sanity check to make sure only write trans appear here */
337 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
341 dbase = ubik_currentTrans->dbase;
343 urecovery_CheckTid(atid);
344 if (!ubik_currentTrans) {
348 code = udisk_truncate(ubik_currentTrans, afile, alen);
353 SDISK_GetVersion(rxcall, aversion)
354 register struct rx_call *rxcall;
355 register struct ubik_version *aversion;
357 register afs_int32 code;
359 if (code = ubik_CheckAuth(rxcall)) {
364 * If we are the sync site, recovery shouldn't be running on any
365 * other site. We shouldn't be getting this RPC as long as we are
366 * the sync site. To prevent any unforseen activity, we should
367 * reject this RPC until we have recognized that we are not the
368 * sync site anymore, and/or if we have any pending WRITE
369 * transactions that have to complete. This way we can be assured
370 * that this RPC would not block any pending transactions that
371 * should either fail or pass. If we have recognized the fact that
372 * we are not the sync site any more, all write transactions would
373 * fail with UNOQUORUM anyway.
375 if (ubeacon_AmSyncSite()) {
380 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
383 /* tell other side there's no dbase */
385 aversion->counter = 0;
390 SDISK_GetFile(rxcall, file, version)
391 register struct rx_call *rxcall;
392 register afs_int32 file;
393 struct ubik_version *version;
395 register afs_int32 code;
396 register struct ubik_dbase *dbase;
397 register afs_int32 offset;
398 struct ubik_stat ubikstat;
403 if (code = ubik_CheckAuth(rxcall)) {
406 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
407 * a sanity check, anyway.
408 if (ubeacon_AmSyncSite()) {
414 code = (*dbase->stat) (dbase, file, &ubikstat);
419 length = ubikstat.size;
420 tlen = htonl(length);
421 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
422 if (code != sizeof(afs_int32)) {
428 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
429 code = (*dbase->read)(dbase, file, tbuffer, offset, tlen);
434 code = rx_Write(rxcall, tbuffer, tlen);
442 code = (*dbase->getlabel)(dbase, file, version); /* return the dbase, too */
447 SDISK_SendFile(rxcall, file, length, avers)
448 register struct rx_call *rxcall;
451 struct ubik_version *avers;
453 register afs_int32 code;
454 register struct ubik_dbase *dbase;
457 struct ubik_version tversion;
459 struct rx_peer *tpeer;
460 struct rx_connection *tconn;
461 afs_uint32 otherHost;
463 /* send the file back to the requester */
465 if (code = ubik_CheckAuth(rxcall)) {
469 /* next, we do a sanity check to see if the guy sending us the database is
470 * the guy we think is the sync site. It turns out that we might not have
471 * decided yet that someone's the sync site, but they could have enough
472 * votes from others to be sync site anyway, and could send us the database
473 * in advance of getting our votes. This is fine, what we're really trying
474 * to check is that some authenticated bogon isn't sending a random database
475 * into another configuration. This could happen on a bad configuration
476 * screwup. Thus, we only object if we're sure we know who the sync site
477 * is, and it ain't the guy talking to us.
479 offset = uvote_GetSyncSite();
480 tconn = rx_ConnectionOf(rxcall);
481 tpeer = rx_PeerOf(tconn);
482 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
483 if (offset && offset != otherHost ) {
484 /* we *know* this is the wrong guy */
492 /* abort any active trans that may scribble over the database */
493 urecovery_AbortAll(dbase);
495 ubik_print("Ubik: Synchronize database with server %s\n",
496 afs_inet_ntoa(otherHost));
499 (*dbase->truncate) (dbase, file, 0); /* truncate first */
500 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
501 tversion.counter = 0;
502 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
504 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
505 code = rx_Read(rxcall, tbuffer, tlen);
511 code = (*dbase->write)(dbase, file, tbuffer, offset, tlen);
521 /* sync data first, then write label and resync (resync done by setlabel call).
522 This way, good label is only on good database. */
523 (*ubik_dbase->sync)(dbase, file);
524 code = (*ubik_dbase->setlabel)(dbase, file, avers);
525 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
526 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
527 LWP_NoYieldSignal(&dbase->version);
531 ubik_print("Ubik: Synchronize database with server %s failed (error = %d)\n",
532 afs_inet_ntoa(otherHost), code);
534 ubik_print("Ubik: Synchronize database completed\n");
541 register struct rx_call *rxcall;
547 * Update remote machines addresses in my server list
548 * Send back my addresses to caller of this RPC
549 * Returns zero on success, else 1.
551 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
552 register struct rx_call *rxcall;
553 UbikInterfaceAddr *inAddr, *outAddr;
555 struct ubik_server *ts, *tmp;
556 afs_uint32 remoteAddr; /* in net byte order */
557 int i, j, found=0, probableMatch=0;
559 /* copy the output parameters */
560 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR; i++)
561 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
563 remoteAddr = htonl(inAddr->hostAddr[0]);
564 for(ts = ubik_servers; ts; ts=ts->next)
565 if ( ts->addr[0] == remoteAddr ) /* both in net byte order */
573 /* verify that all addresses in the incoming RPC are
574 ** not part of other server entries in my CellServDB
576 for ( i=0; !found && (i<UBIK_MAX_INTERFACE_ADDR)
577 && inAddr->hostAddr[i]; i++)
579 remoteAddr = htonl(inAddr->hostAddr[i]);
580 for(tmp = ubik_servers; (!found && tmp); tmp=tmp->next)
582 if ( ts == tmp ) /* this is my server */
584 for ( j=0; (j<UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j]; j++)
585 if ( remoteAddr == tmp->addr[j] )
592 } /* if (probableMatch) */
594 /* inconsistent addresses in CellServDB */
595 if ( !probableMatch || found )
597 ubik_print("Inconsistent Cell Info from server: ");
598 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
599 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
605 /* update our data structures */
606 for ( i=1; i < UBIK_MAX_INTERFACE_ADDR; i++)
607 ts->addr[i] = htonl(inAddr->hostAddr[i]);
609 ubik_print("ubik: A Remote Server has addresses: ");
610 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
611 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
619 struct ubik_server *ts;
622 ubik_print("Local CellServDB:");
623 for ( ts=ubik_servers; ts; ts= ts->next, j++)
625 ubik_print("Server %d: ", j);
626 for ( i=0; (i<UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
627 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
632 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
633 struct rx_call *rxcall;
634 struct ubik_tid *atid;
635 struct ubik_version *oldversionp;
636 struct ubik_version *newversionp;
639 struct ubik_dbase *dbase;
641 if (code = ubik_CheckAuth(rxcall)) {
645 if (!ubik_currentTrans) {
648 /* sanity check to make sure only write trans appear here */
649 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
653 /* Should not get this for the sync site */
654 if (ubeacon_AmSyncSite()) {
658 dbase = ubik_currentTrans->dbase;
660 urecovery_CheckTid(atid);
661 if (!ubik_currentTrans) {
666 /* Set the label if its version matches the sync-site's */
667 if ((oldversionp->epoch == ubik_dbVersion.epoch) &&
668 (oldversionp->counter == ubik_dbVersion.counter)) {
669 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
671 ubik_dbase->version = *newversionp;
672 ubik_dbVersion = *newversionp;