2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afs/param.h>
11 #include <sys/types.h>
16 #include <netinet/in.h>
22 #define UBIK_INTERNALS
25 int (*ubik_CheckRXSecurityProc)();
26 char *ubik_CheckRXSecurityRock;
28 /* routines for handling requests remotely-submitted by the sync site. These are
29 only write transactions (we don't propagate read trans), and there is at most one
30 write transaction extant at any one time.
33 struct ubik_trans *ubik_currentTrans = 0;
37 register struct rx_call *acall; {
38 register afs_int32 code;
39 if (ubik_CheckRXSecurityProc) {
40 code = (*ubik_CheckRXSecurityProc)(ubik_CheckRXSecurityRock, acall);
46 /* the rest of these guys handle remote execution of write
47 * transactions: this is the code executed on the other servers when a
48 * sync site is executing a write transaction.
50 SDISK_Begin(rxcall, atid)
51 register struct rx_call *rxcall;
52 struct ubik_tid *atid;
54 register afs_int32 code;
56 if (code = ubik_CheckAuth(rxcall)) {
60 urecovery_CheckTid(atid);
61 if (ubik_currentTrans) {
62 /* If the thread is not waiting for lock - ok to end it */
63 if (ubik_currentTrans->locktype != LOCKWAIT) {
64 udisk_end(ubik_currentTrans);
66 ubik_currentTrans = (struct ubik_trans *) 0;
68 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
69 if (!code && ubik_currentTrans) {
70 /* label this trans with the right trans id */
71 ubik_currentTrans->tid.epoch = atid->epoch;
72 ubik_currentTrans->tid.counter = atid->counter;
78 SDISK_Commit(rxcall, atid)
79 register struct rx_call *rxcall;
80 struct ubik_tid *atid;
82 register afs_int32 code;
83 register struct ubik_dbase *dbase;
85 if (code = ubik_CheckAuth(rxcall)) {
89 if (!ubik_currentTrans) {
93 * sanity check to make sure only write trans appear here
95 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
99 dbase = ubik_currentTrans->dbase;
101 urecovery_CheckTid(atid);
102 if (!ubik_currentTrans) {
107 code = udisk_commit(ubik_currentTrans);
109 /* sync site should now match */
110 ubik_dbVersion = ubik_dbase->version;
116 SDISK_ReleaseLocks(rxcall, atid)
117 register struct rx_call *rxcall;
118 struct ubik_tid *atid;
120 register struct ubik_dbase *dbase;
121 register afs_int32 code;
123 if (code = ubik_CheckAuth(rxcall)) {
127 if (!ubik_currentTrans) {
130 /* sanity check to make sure only write trans appear here */
131 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
135 dbase = ubik_currentTrans->dbase;
137 urecovery_CheckTid(atid);
138 if (!ubik_currentTrans) {
143 /* If the thread is not waiting for lock - ok to end it */
144 if (ubik_currentTrans->locktype != LOCKWAIT) {
145 udisk_end(ubik_currentTrans);
147 ubik_currentTrans = (struct ubik_trans *) 0;
152 SDISK_Abort(rxcall, atid)
153 register struct rx_call *rxcall;
154 struct ubik_tid *atid;
156 register afs_int32 code;
157 register struct ubik_dbase *dbase;
159 if (code = ubik_CheckAuth(rxcall)) {
163 if (!ubik_currentTrans) {
166 /* sanity check to make sure only write trans appear here */
167 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
171 dbase = ubik_currentTrans->dbase;
173 urecovery_CheckTid(atid);
174 if (!ubik_currentTrans) {
179 code = udisk_abort(ubik_currentTrans);
180 /* If the thread is not waiting for lock - ok to end it */
181 if (ubik_currentTrans->locktype != LOCKWAIT) {
182 udisk_end(ubik_currentTrans);
184 ubik_currentTrans = (struct ubik_trans *) 0;
189 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
190 register struct rx_call *rxcall;
191 struct ubik_tid *atid;
192 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
194 register afs_int32 code;
195 register struct ubik_dbase *dbase;
196 struct ubik_trans *ubik_thisTrans;
198 if (code = ubik_CheckAuth(rxcall)) {
201 if (!ubik_currentTrans) {
204 /* sanity check to make sure only write trans appear here */
205 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
211 dbase = ubik_currentTrans->dbase;
213 urecovery_CheckTid(atid);
214 if (!ubik_currentTrans) {
219 ubik_thisTrans = ubik_currentTrans;
220 code = ulock_getLock(ubik_currentTrans, atype, 1);
222 /* While waiting, the transaction may have been ended/
223 * aborted from under us (urecovery_CheckTid). In that
224 * case, end the transaction here.
226 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
227 udisk_end(ubik_thisTrans);
235 /* Write a vector of data */
236 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
237 register struct rx_call *rxcall;
238 struct ubik_tid *atid;
239 iovec_wrt *io_vector;
240 iovec_buf *io_buffer;
242 afs_int32 code, i, offset;
243 struct ubik_dbase *dbase;
244 struct ubik_iovec *iovec;
247 if (code = ubik_CheckAuth(rxcall)) {
250 if (!ubik_currentTrans) {
253 /* sanity check to make sure only write trans appear here */
254 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
258 dbase = ubik_currentTrans->dbase;
260 urecovery_CheckTid(atid);
261 if (!ubik_currentTrans) {
266 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
267 iobuf = (char *)io_buffer->iovec_buf_val;
268 for (i=0, offset=0; i<io_vector->iovec_wrt_len; i++) {
269 /* Sanity check for going off end of buffer */
270 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
273 code = udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
274 iovec[i].position, iovec[i].length);
278 offset += iovec[i].length;
285 SDISK_Write(rxcall, atid, afile, apos, adata)
286 register struct rx_call *rxcall;
287 struct ubik_tid *atid;
288 afs_int32 afile, apos;
289 register bulkdata *adata;
291 register afs_int32 code;
292 register struct ubik_dbase *dbase;
294 if (code = ubik_CheckAuth(rxcall)) {
297 if (!ubik_currentTrans) {
300 /* sanity check to make sure only write trans appear here */
301 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
305 dbase = ubik_currentTrans->dbase;
307 urecovery_CheckTid(atid);
308 if (!ubik_currentTrans) {
312 code = udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos, adata->bulkdata_len);
317 SDISK_Truncate(rxcall, atid, afile, alen)
318 register struct rx_call *rxcall;
319 struct ubik_tid *atid;
323 register afs_int32 code;
324 register struct ubik_dbase *dbase;
326 if (code = ubik_CheckAuth(rxcall)) {
329 if (!ubik_currentTrans) {
332 /* sanity check to make sure only write trans appear here */
333 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
337 dbase = ubik_currentTrans->dbase;
339 urecovery_CheckTid(atid);
340 if (!ubik_currentTrans) {
344 code = udisk_truncate(ubik_currentTrans, afile, alen);
349 SDISK_GetVersion(rxcall, aversion)
350 register struct rx_call *rxcall;
351 register struct ubik_version *aversion;
353 register afs_int32 code;
355 if (code = ubik_CheckAuth(rxcall)) {
360 * If we are the sync site, recovery shouldn't be running on any
361 * other site. We shouldn't be getting this RPC as long as we are
362 * the sync site. To prevent any unforseen activity, we should
363 * reject this RPC until we have recognized that we are not the
364 * sync site anymore, and/or if we have any pending WRITE
365 * transactions that have to complete. This way we can be assured
366 * that this RPC would not block any pending transactions that
367 * should either fail or pass. If we have recognized the fact that
368 * we are not the sync site any more, all write transactions would
369 * fail with UNOQUORUM anyway.
371 if (ubeacon_AmSyncSite()) {
376 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
379 /* tell other side there's no dbase */
381 aversion->counter = 0;
386 SDISK_GetFile(rxcall, file, version)
387 register struct rx_call *rxcall;
388 register afs_int32 file;
389 struct ubik_version *version;
391 register afs_int32 code;
392 register struct ubik_dbase *dbase;
393 register afs_int32 offset;
394 struct ubik_stat ubikstat;
399 if (code = ubik_CheckAuth(rxcall)) {
402 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
403 * a sanity check, anyway.
404 if (ubeacon_AmSyncSite()) {
410 code = (*dbase->stat) (dbase, file, &ubikstat);
415 length = ubikstat.size;
416 tlen = htonl(length);
417 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
418 if (code != sizeof(afs_int32)) {
424 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
425 code = (*dbase->read)(dbase, file, tbuffer, offset, tlen);
430 code = rx_Write(rxcall, tbuffer, tlen);
438 code = (*dbase->getlabel)(dbase, file, version); /* return the dbase, too */
443 SDISK_SendFile(rxcall, file, length, avers)
444 register struct rx_call *rxcall;
447 struct ubik_version *avers;
449 register afs_int32 code;
450 register struct ubik_dbase *dbase;
453 struct ubik_version tversion;
455 struct rx_peer *tpeer;
456 struct rx_connection *tconn;
457 afs_uint32 otherHost;
459 /* send the file back to the requester */
461 if (code = ubik_CheckAuth(rxcall)) {
465 /* next, we do a sanity check to see if the guy sending us the database is
466 * the guy we think is the sync site. It turns out that we might not have
467 * decided yet that someone's the sync site, but they could have enough
468 * votes from others to be sync site anyway, and could send us the database
469 * in advance of getting our votes. This is fine, what we're really trying
470 * to check is that some authenticated bogon isn't sending a random database
471 * into another configuration. This could happen on a bad configuration
472 * screwup. Thus, we only object if we're sure we know who the sync site
473 * is, and it ain't the guy talking to us.
475 offset = uvote_GetSyncSite();
476 tconn = rx_ConnectionOf(rxcall);
477 tpeer = rx_PeerOf(tconn);
478 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
479 if (offset && offset != otherHost ) {
480 /* we *know* this is the wrong guy */
488 /* abort any active trans that may scribble over the database */
489 urecovery_AbortAll(dbase);
491 ubik_print("Ubik: Synchronize database with server %s\n",
492 afs_inet_ntoa(otherHost));
495 (*dbase->truncate) (dbase, file, 0); /* truncate first */
496 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
497 tversion.counter = 0;
498 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
500 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
501 code = rx_Read(rxcall, tbuffer, tlen);
507 code = (*dbase->write)(dbase, file, tbuffer, offset, tlen);
517 /* sync data first, then write label and resync (resync done by setlabel call).
518 This way, good label is only on good database. */
519 (*ubik_dbase->sync)(dbase, file);
520 code = (*ubik_dbase->setlabel)(dbase, file, avers);
521 bcopy(avers, &ubik_dbase->version, sizeof(struct ubik_version));
522 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
523 LWP_NoYieldSignal(&dbase->version);
527 ubik_print("Ubik: Synchronize database with server %s failed (error = %d)\n",
528 afs_inet_ntoa(otherHost), code);
530 ubik_print("Ubik: Synchronize database completed\n");
537 register struct rx_call *rxcall;
543 * Update remote machines addresses in my server list
544 * Send back my addresses to caller of this RPC
545 * Returns zero on success, else 1.
547 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
548 register struct rx_call *rxcall;
549 UbikInterfaceAddr *inAddr, *outAddr;
551 struct ubik_server *ts, *tmp;
552 afs_uint32 remoteAddr; /* in net byte order */
553 int i, j, found=0, probableMatch=0;
555 /* copy the output parameters */
556 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR; i++)
557 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
559 remoteAddr = htonl(inAddr->hostAddr[0]);
560 for(ts = ubik_servers; ts; ts=ts->next)
561 if ( ts->addr[0] == remoteAddr ) /* both in net byte order */
569 /* verify that all addresses in the incoming RPC are
570 ** not part of other server entries in my CellServDB
572 for ( i=0; !found && (i<UBIK_MAX_INTERFACE_ADDR)
573 && inAddr->hostAddr[i]; i++)
575 remoteAddr = htonl(inAddr->hostAddr[i]);
576 for(tmp = ubik_servers; (!found && tmp); tmp=tmp->next)
578 if ( ts == tmp ) /* this is my server */
580 for ( j=0; (j<UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j]; j++)
581 if ( remoteAddr == tmp->addr[j] )
588 } /* if (probableMatch) */
590 /* inconsistent addresses in CellServDB */
591 if ( !probableMatch || found )
593 ubik_print("Inconsistent Cell Info from server: ");
594 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
595 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
601 /* update our data structures */
602 for ( i=1; i < UBIK_MAX_INTERFACE_ADDR; i++)
603 ts->addr[i] = htonl(inAddr->hostAddr[i]);
605 ubik_print("ubik: A Remote Server has addresses: ");
606 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
607 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
615 struct ubik_server *ts;
618 ubik_print("Local CellServDB:");
619 for ( ts=ubik_servers; ts; ts= ts->next, j++)
621 ubik_print("Server %d: ", j);
622 for ( i=0; (i<UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
623 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
628 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
629 struct rx_call *rxcall;
630 struct ubik_tid *atid;
631 struct ubik_version *oldversionp;
632 struct ubik_version *newversionp;
635 struct ubik_dbase *dbase;
637 if (code = ubik_CheckAuth(rxcall)) {
641 if (!ubik_currentTrans) {
644 /* sanity check to make sure only write trans appear here */
645 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
649 /* Should not get this for the sync site */
650 if (ubeacon_AmSyncSite()) {
654 dbase = ubik_currentTrans->dbase;
656 urecovery_CheckTid(atid);
657 if (!ubik_currentTrans) {
662 /* Set the label if its version matches the sync-site's */
663 if ((oldversionp->epoch == ubik_dbVersion.epoch) &&
664 (oldversionp->counter == ubik_dbVersion.counter)) {
665 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
667 ubik_dbase->version = *newversionp;
668 ubik_dbVersion = *newversionp;