2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #include <sys/types.h>
21 #include <netinet/in.h>
34 #define UBIK_INTERNALS
37 int (*ubik_CheckRXSecurityProc) ();
38 char *ubik_CheckRXSecurityRock;
39 void printServerInfo();
41 /* routines for handling requests remotely-submitted by the sync site. These are
42 only write transactions (we don't propagate read trans), and there is at most one
43 write transaction extant at any one time.
46 struct ubik_trans *ubik_currentTrans = 0;
50 register struct rx_call *acall;
52 register afs_int32 code;
53 if (ubik_CheckRXSecurityProc) {
54 code = (*ubik_CheckRXSecurityProc) (ubik_CheckRXSecurityRock, acall);
60 /* the rest of these guys handle remote execution of write
61 * transactions: this is the code executed on the other servers when a
62 * sync site is executing a write transaction.
65 SDISK_Begin(rxcall, atid)
66 register struct rx_call *rxcall;
67 struct ubik_tid *atid;
69 register afs_int32 code;
71 if ((code = ubik_CheckAuth(rxcall))) {
75 urecovery_CheckTid(atid);
76 if (ubik_currentTrans) {
77 /* If the thread is not waiting for lock - ok to end it */
78 #if !defined(UBIK_PAUSE)
79 if (ubik_currentTrans->locktype != LOCKWAIT) {
80 #endif /* UBIK_PAUSE */
81 udisk_end(ubik_currentTrans);
82 #if !defined(UBIK_PAUSE)
84 #endif /* UBIK_PAUSE */
85 ubik_currentTrans = (struct ubik_trans *)0;
87 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
88 if (!code && ubik_currentTrans) {
89 /* label this trans with the right trans id */
90 ubik_currentTrans->tid.epoch = atid->epoch;
91 ubik_currentTrans->tid.counter = atid->counter;
99 SDISK_Commit(rxcall, atid)
100 register struct rx_call *rxcall;
101 struct ubik_tid *atid;
103 register afs_int32 code;
104 register struct ubik_dbase *dbase;
106 if ((code = ubik_CheckAuth(rxcall))) {
110 if (!ubik_currentTrans) {
114 * sanity check to make sure only write trans appear here
116 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
120 dbase = ubik_currentTrans->dbase;
122 urecovery_CheckTid(atid);
123 if (!ubik_currentTrans) {
128 code = udisk_commit(ubik_currentTrans);
130 /* sync site should now match */
131 ubik_dbVersion = ubik_dbase->version;
138 SDISK_ReleaseLocks(rxcall, atid)
139 register struct rx_call *rxcall;
140 struct ubik_tid *atid;
142 register struct ubik_dbase *dbase;
143 register afs_int32 code;
145 if ((code = ubik_CheckAuth(rxcall))) {
149 if (!ubik_currentTrans) {
152 /* sanity check to make sure only write trans appear here */
153 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
157 dbase = ubik_currentTrans->dbase;
159 urecovery_CheckTid(atid);
160 if (!ubik_currentTrans) {
165 /* If the thread is not waiting for lock - ok to end it */
166 #if !defined(UBIK_PAUSE)
167 if (ubik_currentTrans->locktype != LOCKWAIT) {
168 #endif /* UBIK_PAUSE */
169 udisk_end(ubik_currentTrans);
170 #if !defined(UBIK_PAUSE)
172 #endif /* UBIK_PAUSE */
173 ubik_currentTrans = (struct ubik_trans *)0;
179 SDISK_Abort(rxcall, atid)
180 register struct rx_call *rxcall;
181 struct ubik_tid *atid;
183 register afs_int32 code;
184 register struct ubik_dbase *dbase;
186 if ((code = ubik_CheckAuth(rxcall))) {
190 if (!ubik_currentTrans) {
193 /* sanity check to make sure only write trans appear here */
194 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
198 dbase = ubik_currentTrans->dbase;
200 urecovery_CheckTid(atid);
201 if (!ubik_currentTrans) {
206 code = udisk_abort(ubik_currentTrans);
207 /* If the thread is not waiting for lock - ok to end it */
208 #if !defined(UBIK_PAUSE)
209 if (ubik_currentTrans->locktype != LOCKWAIT) {
210 #endif /* UBIK_PAUSE */
211 udisk_end(ubik_currentTrans);
212 #if !defined(UBIK_PAUSE)
214 #endif /* UBIK_PAUSE */
215 ubik_currentTrans = (struct ubik_trans *)0;
221 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
222 register struct rx_call *rxcall;
223 struct ubik_tid *atid;
224 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
226 register afs_int32 code;
227 register struct ubik_dbase *dbase;
228 struct ubik_trans *ubik_thisTrans;
230 if ((code = ubik_CheckAuth(rxcall))) {
233 if (!ubik_currentTrans) {
236 /* sanity check to make sure only write trans appear here */
237 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
243 dbase = ubik_currentTrans->dbase;
245 urecovery_CheckTid(atid);
246 if (!ubik_currentTrans) {
251 ubik_thisTrans = ubik_currentTrans;
252 code = ulock_getLock(ubik_currentTrans, atype, 1);
254 /* While waiting, the transaction may have been ended/
255 * aborted from under us (urecovery_CheckTid). In that
256 * case, end the transaction here.
258 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
259 udisk_end(ubik_thisTrans);
267 /* Write a vector of data */
269 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
270 register struct rx_call *rxcall;
271 struct ubik_tid *atid;
272 iovec_wrt *io_vector;
273 iovec_buf *io_buffer;
275 afs_int32 code, i, offset;
276 struct ubik_dbase *dbase;
277 struct ubik_iovec *iovec;
280 if ((code = ubik_CheckAuth(rxcall))) {
283 if (!ubik_currentTrans) {
286 /* sanity check to make sure only write trans appear here */
287 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
291 dbase = ubik_currentTrans->dbase;
293 urecovery_CheckTid(atid);
294 if (!ubik_currentTrans) {
299 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
300 iobuf = (char *)io_buffer->iovec_buf_val;
301 for (i = 0, offset = 0; i < io_vector->iovec_wrt_len; i++) {
302 /* Sanity check for going off end of buffer */
303 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
307 udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
308 iovec[i].position, iovec[i].length);
313 offset += iovec[i].length;
321 SDISK_Write(rxcall, atid, afile, apos, adata)
322 register struct rx_call *rxcall;
323 struct ubik_tid *atid;
324 afs_int32 afile, apos;
325 register bulkdata *adata;
327 register afs_int32 code;
328 register struct ubik_dbase *dbase;
330 if ((code = ubik_CheckAuth(rxcall))) {
333 if (!ubik_currentTrans) {
336 /* sanity check to make sure only write trans appear here */
337 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
341 dbase = ubik_currentTrans->dbase;
343 urecovery_CheckTid(atid);
344 if (!ubik_currentTrans) {
349 udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos,
350 adata->bulkdata_len);
356 SDISK_Truncate(rxcall, atid, afile, alen)
357 register struct rx_call *rxcall;
358 struct ubik_tid *atid;
362 register afs_int32 code;
363 register struct ubik_dbase *dbase;
365 if ((code = ubik_CheckAuth(rxcall))) {
368 if (!ubik_currentTrans) {
371 /* sanity check to make sure only write trans appear here */
372 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
376 dbase = ubik_currentTrans->dbase;
378 urecovery_CheckTid(atid);
379 if (!ubik_currentTrans) {
383 code = udisk_truncate(ubik_currentTrans, afile, alen);
389 SDISK_GetVersion(rxcall, aversion)
390 register struct rx_call *rxcall;
391 register struct ubik_version *aversion;
393 register afs_int32 code;
395 if ((code = ubik_CheckAuth(rxcall))) {
400 * If we are the sync site, recovery shouldn't be running on any
401 * other site. We shouldn't be getting this RPC as long as we are
402 * the sync site. To prevent any unforseen activity, we should
403 * reject this RPC until we have recognized that we are not the
404 * sync site anymore, and/or if we have any pending WRITE
405 * transactions that have to complete. This way we can be assured
406 * that this RPC would not block any pending transactions that
407 * should either fail or pass. If we have recognized the fact that
408 * we are not the sync site any more, all write transactions would
409 * fail with UNOQUORUM anyway.
411 if (ubeacon_AmSyncSite()) {
416 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
419 /* tell other side there's no dbase */
421 aversion->counter = 0;
427 SDISK_GetFile(rxcall, file, version)
428 register struct rx_call *rxcall;
429 register afs_int32 file;
430 struct ubik_version *version;
432 register afs_int32 code;
433 register struct ubik_dbase *dbase;
434 register afs_int32 offset;
435 struct ubik_stat ubikstat;
440 if ((code = ubik_CheckAuth(rxcall))) {
443 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
444 * a sanity check, anyway.
445 if (ubeacon_AmSyncSite()) {
451 code = (*dbase->stat) (dbase, file, &ubikstat);
456 length = ubikstat.size;
457 tlen = htonl(length);
458 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
459 if (code != sizeof(afs_int32)) {
465 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
466 code = (*dbase->read) (dbase, file, tbuffer, offset, tlen);
471 code = rx_Write(rxcall, tbuffer, tlen);
479 code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */
485 SDISK_SendFile(rxcall, file, length, avers)
486 register struct rx_call *rxcall;
489 struct ubik_version *avers;
491 register afs_int32 code;
492 register struct ubik_dbase *dbase;
495 struct ubik_version tversion;
497 struct rx_peer *tpeer;
498 struct rx_connection *tconn;
499 afs_uint32 otherHost;
501 /* send the file back to the requester */
503 if ((code = ubik_CheckAuth(rxcall))) {
507 /* next, we do a sanity check to see if the guy sending us the database is
508 * the guy we think is the sync site. It turns out that we might not have
509 * decided yet that someone's the sync site, but they could have enough
510 * votes from others to be sync site anyway, and could send us the database
511 * in advance of getting our votes. This is fine, what we're really trying
512 * to check is that some authenticated bogon isn't sending a random database
513 * into another configuration. This could happen on a bad configuration
514 * screwup. Thus, we only object if we're sure we know who the sync site
515 * is, and it ain't the guy talking to us.
517 offset = uvote_GetSyncSite();
518 tconn = rx_ConnectionOf(rxcall);
519 tpeer = rx_PeerOf(tconn);
520 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
521 if (offset && offset != otherHost) {
522 /* we *know* this is the wrong guy */
530 /* abort any active trans that may scribble over the database */
531 urecovery_AbortAll(dbase);
533 ubik_print("Ubik: Synchronize database with server %s\n",
534 afs_inet_ntoa(otherHost));
537 (*dbase->truncate) (dbase, file, 0); /* truncate first */
538 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
539 tversion.counter = 0;
540 (*dbase->setlabel) (dbase, file, &tversion);/* setlabel does sync */
541 memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
543 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
544 code = rx_Read(rxcall, tbuffer, tlen);
550 code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
560 /* sync data first, then write label and resync (resync done by setlabel call).
561 * This way, good label is only on good database. */
562 (*ubik_dbase->sync) (dbase, file);
563 code = (*ubik_dbase->setlabel) (dbase, file, avers);
564 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
565 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
566 LWP_NoYieldSignal(&dbase->version);
571 ("Ubik: Synchronize database with server %s failed (error = %d)\n",
572 afs_inet_ntoa(otherHost), code);
574 ubik_print("Ubik: Synchronize database completed\n");
582 register struct rx_call *rxcall;
588 * Update remote machines addresses in my server list
589 * Send back my addresses to caller of this RPC
590 * Returns zero on success, else 1.
593 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
594 register struct rx_call *rxcall;
595 UbikInterfaceAddr *inAddr, *outAddr;
597 struct ubik_server *ts, *tmp;
598 afs_uint32 remoteAddr; /* in net byte order */
599 int i, j, found = 0, probableMatch = 0;
601 /* copy the output parameters */
602 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++)
603 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
605 remoteAddr = htonl(inAddr->hostAddr[0]);
606 for (ts = ubik_servers; ts; ts = ts->next)
607 if (ts->addr[0] == remoteAddr) { /* both in net byte order */
613 /* verify that all addresses in the incoming RPC are
614 ** not part of other server entries in my CellServDB
616 for (i = 0; !found && (i < UBIK_MAX_INTERFACE_ADDR)
617 && inAddr->hostAddr[i]; i++) {
618 remoteAddr = htonl(inAddr->hostAddr[i]);
619 for (tmp = ubik_servers; (!found && tmp); tmp = tmp->next) {
620 if (ts == tmp) /* this is my server */
622 for (j = 0; (j < UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j];
624 if (remoteAddr == tmp->addr[j]) {
632 /* if (probableMatch) */
633 /* inconsistent addresses in CellServDB */
634 if (!probableMatch || found) {
635 ubik_print("Inconsistent Cell Info from server: ");
636 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
637 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
645 /* update our data structures */
646 for (i = 1; i < UBIK_MAX_INTERFACE_ADDR; i++)
647 ts->addr[i] = htonl(inAddr->hostAddr[i]);
649 ubik_print("ubik: A Remote Server has addresses: ");
650 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
651 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
660 struct ubik_server *ts;
663 ubik_print("Local CellServDB:");
664 for (ts = ubik_servers; ts; ts = ts->next, j++) {
665 ubik_print("Server %d: ", j);
666 for (i = 0; (i < UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
667 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
673 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
674 struct rx_call *rxcall;
675 struct ubik_tid *atid;
676 struct ubik_version *oldversionp;
677 struct ubik_version *newversionp;
680 struct ubik_dbase *dbase;
682 if ((code = ubik_CheckAuth(rxcall))) {
686 if (!ubik_currentTrans) {
689 /* sanity check to make sure only write trans appear here */
690 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
694 /* Should not get this for the sync site */
695 if (ubeacon_AmSyncSite()) {
699 dbase = ubik_currentTrans->dbase;
701 urecovery_CheckTid(atid);
702 if (!ubik_currentTrans) {
707 /* Set the label if its version matches the sync-site's */
708 if ((oldversionp->epoch == ubik_dbVersion.epoch)
709 && (oldversionp->counter == ubik_dbVersion.counter)) {
710 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
712 ubik_dbase->version = *newversionp;
713 ubik_dbVersion = *newversionp;