2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #include <sys/types.h>
21 #include <netinet/in.h>
34 #define UBIK_INTERNALS
37 int (*ubik_CheckRXSecurityProc) ();
38 char *ubik_CheckRXSecurityRock;
39 void printServerInfo();
41 /* routines for handling requests remotely-submitted by the sync site. These are
42 only write transactions (we don't propagate read trans), and there is at most one
43 write transaction extant at any one time.
46 struct ubik_trans *ubik_currentTrans = 0;
50 register struct rx_call *acall;
52 register afs_int32 code;
53 if (ubik_CheckRXSecurityProc) {
54 code = (*ubik_CheckRXSecurityProc) (ubik_CheckRXSecurityRock, acall);
60 /* the rest of these guys handle remote execution of write
61 * transactions: this is the code executed on the other servers when a
62 * sync site is executing a write transaction.
65 SDISK_Begin(rxcall, atid)
66 register struct rx_call *rxcall;
67 struct ubik_tid *atid;
69 register afs_int32 code;
71 if ((code = ubik_CheckAuth(rxcall))) {
75 urecovery_CheckTid(atid);
76 if (ubik_currentTrans) {
77 /* If the thread is not waiting for lock - ok to end it */
78 #if !defined(UBIK_PAUSE)
79 if (ubik_currentTrans->locktype != LOCKWAIT) {
80 #endif /* UBIK_PAUSE */
81 udisk_end(ubik_currentTrans);
82 #if !defined(UBIK_PAUSE)
84 #endif /* UBIK_PAUSE */
85 ubik_currentTrans = (struct ubik_trans *)0;
87 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
88 if (!code && ubik_currentTrans) {
89 /* label this trans with the right trans id */
90 ubik_currentTrans->tid.epoch = atid->epoch;
91 ubik_currentTrans->tid.counter = atid->counter;
99 SDISK_Commit(rxcall, atid)
100 register struct rx_call *rxcall;
101 struct ubik_tid *atid;
103 register afs_int32 code;
104 register struct ubik_dbase *dbase;
106 if ((code = ubik_CheckAuth(rxcall))) {
110 if (!ubik_currentTrans) {
114 * sanity check to make sure only write trans appear here
116 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
120 dbase = ubik_currentTrans->dbase;
122 urecovery_CheckTid(atid);
123 if (!ubik_currentTrans) {
128 code = udisk_commit(ubik_currentTrans);
130 /* sync site should now match */
131 ubik_dbVersion = ubik_dbase->version;
138 SDISK_ReleaseLocks(rxcall, atid)
139 register struct rx_call *rxcall;
140 struct ubik_tid *atid;
142 register struct ubik_dbase *dbase;
143 register afs_int32 code;
145 if ((code = ubik_CheckAuth(rxcall))) {
149 if (!ubik_currentTrans) {
152 /* sanity check to make sure only write trans appear here */
153 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
157 dbase = ubik_currentTrans->dbase;
159 urecovery_CheckTid(atid);
160 if (!ubik_currentTrans) {
165 /* If the thread is not waiting for lock - ok to end it */
166 #if !defined(UBIK_PAUSE)
167 if (ubik_currentTrans->locktype != LOCKWAIT) {
168 #endif /* UBIK_PAUSE */
169 udisk_end(ubik_currentTrans);
170 #if !defined(UBIK_PAUSE)
172 #endif /* UBIK_PAUSE */
173 ubik_currentTrans = (struct ubik_trans *)0;
179 SDISK_Abort(rxcall, atid)
180 register struct rx_call *rxcall;
181 struct ubik_tid *atid;
183 register afs_int32 code;
184 register struct ubik_dbase *dbase;
186 if ((code = ubik_CheckAuth(rxcall))) {
190 if (!ubik_currentTrans) {
193 /* sanity check to make sure only write trans appear here */
194 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
198 dbase = ubik_currentTrans->dbase;
200 urecovery_CheckTid(atid);
201 if (!ubik_currentTrans) {
206 code = udisk_abort(ubik_currentTrans);
207 /* If the thread is not waiting for lock - ok to end it */
208 #if !defined(UBIK_PAUSE)
209 if (ubik_currentTrans->locktype != LOCKWAIT) {
210 #endif /* UBIK_PAUSE */
211 udisk_end(ubik_currentTrans);
212 #if !defined(UBIK_PAUSE)
214 #endif /* UBIK_PAUSE */
215 ubik_currentTrans = (struct ubik_trans *)0;
221 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
222 register struct rx_call *rxcall;
223 struct ubik_tid *atid;
224 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
226 register afs_int32 code;
227 register struct ubik_dbase *dbase;
228 struct ubik_trans *ubik_thisTrans;
230 if ((code = ubik_CheckAuth(rxcall))) {
233 if (!ubik_currentTrans) {
236 /* sanity check to make sure only write trans appear here */
237 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
243 dbase = ubik_currentTrans->dbase;
245 urecovery_CheckTid(atid);
246 if (!ubik_currentTrans) {
251 ubik_thisTrans = ubik_currentTrans;
252 code = ulock_getLock(ubik_currentTrans, atype, 1);
254 /* While waiting, the transaction may have been ended/
255 * aborted from under us (urecovery_CheckTid). In that
256 * case, end the transaction here.
258 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
259 udisk_end(ubik_thisTrans);
267 /* Write a vector of data */
269 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
270 register struct rx_call *rxcall;
271 struct ubik_tid *atid;
272 iovec_wrt *io_vector;
273 iovec_buf *io_buffer;
275 afs_int32 code, i, offset;
276 struct ubik_dbase *dbase;
277 struct ubik_iovec *iovec;
280 if ((code = ubik_CheckAuth(rxcall))) {
283 if (!ubik_currentTrans) {
286 /* sanity check to make sure only write trans appear here */
287 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
291 dbase = ubik_currentTrans->dbase;
293 urecovery_CheckTid(atid);
294 if (!ubik_currentTrans) {
299 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
300 iobuf = (char *)io_buffer->iovec_buf_val;
301 for (i = 0, offset = 0; i < io_vector->iovec_wrt_len; i++) {
302 /* Sanity check for going off end of buffer */
303 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
307 udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
308 iovec[i].position, iovec[i].length);
313 offset += iovec[i].length;
321 SDISK_Write(rxcall, atid, afile, apos, adata)
322 register struct rx_call *rxcall;
323 struct ubik_tid *atid;
324 afs_int32 afile, apos;
325 register bulkdata *adata;
327 register afs_int32 code;
328 register struct ubik_dbase *dbase;
330 if ((code = ubik_CheckAuth(rxcall))) {
333 if (!ubik_currentTrans) {
336 /* sanity check to make sure only write trans appear here */
337 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
341 dbase = ubik_currentTrans->dbase;
343 urecovery_CheckTid(atid);
344 if (!ubik_currentTrans) {
349 udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos,
350 adata->bulkdata_len);
356 SDISK_Truncate(rxcall, atid, afile, alen)
357 register struct rx_call *rxcall;
358 struct ubik_tid *atid;
362 register afs_int32 code;
363 register struct ubik_dbase *dbase;
365 if ((code = ubik_CheckAuth(rxcall))) {
368 if (!ubik_currentTrans) {
371 /* sanity check to make sure only write trans appear here */
372 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
376 dbase = ubik_currentTrans->dbase;
378 urecovery_CheckTid(atid);
379 if (!ubik_currentTrans) {
383 code = udisk_truncate(ubik_currentTrans, afile, alen);
389 SDISK_GetVersion(rxcall, aversion)
390 register struct rx_call *rxcall;
391 register struct ubik_version *aversion;
393 register afs_int32 code;
395 if ((code = ubik_CheckAuth(rxcall))) {
400 * If we are the sync site, recovery shouldn't be running on any
401 * other site. We shouldn't be getting this RPC as long as we are
402 * the sync site. To prevent any unforseen activity, we should
403 * reject this RPC until we have recognized that we are not the
404 * sync site anymore, and/or if we have any pending WRITE
405 * transactions that have to complete. This way we can be assured
406 * that this RPC would not block any pending transactions that
407 * should either fail or pass. If we have recognized the fact that
408 * we are not the sync site any more, all write transactions would
409 * fail with UNOQUORUM anyway.
411 if (ubeacon_AmSyncSite()) {
416 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
419 /* tell other side there's no dbase */
421 aversion->counter = 0;
427 SDISK_GetFile(rxcall, file, version)
428 register struct rx_call *rxcall;
429 register afs_int32 file;
430 struct ubik_version *version;
432 register afs_int32 code;
433 register struct ubik_dbase *dbase;
434 register afs_int32 offset;
435 struct ubik_stat ubikstat;
440 if ((code = ubik_CheckAuth(rxcall))) {
443 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
444 * a sanity check, anyway.
445 if (ubeacon_AmSyncSite()) {
451 code = (*dbase->stat) (dbase, file, &ubikstat);
456 length = ubikstat.size;
457 tlen = htonl(length);
458 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
459 if (code != sizeof(afs_int32)) {
465 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
466 code = (*dbase->read) (dbase, file, tbuffer, offset, tlen);
471 code = rx_Write(rxcall, tbuffer, tlen);
479 code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */
485 SDISK_SendFile(rxcall, file, length, avers)
486 register struct rx_call *rxcall;
489 struct ubik_version *avers;
491 register afs_int32 code;
492 register struct ubik_dbase *dbase;
495 struct ubik_version tversion;
497 struct rx_peer *tpeer;
498 struct rx_connection *tconn;
499 afs_uint32 otherHost;
501 /* send the file back to the requester */
503 if ((code = ubik_CheckAuth(rxcall))) {
507 /* next, we do a sanity check to see if the guy sending us the database is
508 * the guy we think is the sync site. It turns out that we might not have
509 * decided yet that someone's the sync site, but they could have enough
510 * votes from others to be sync site anyway, and could send us the database
511 * in advance of getting our votes. This is fine, what we're really trying
512 * to check is that some authenticated bogon isn't sending a random database
513 * into another configuration. This could happen on a bad configuration
514 * screwup. Thus, we only object if we're sure we know who the sync site
515 * is, and it ain't the guy talking to us.
517 offset = uvote_GetSyncSite();
518 tconn = rx_ConnectionOf(rxcall);
519 tpeer = rx_PeerOf(tconn);
520 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
521 if (offset && offset != otherHost) {
522 /* we *know* this is the wrong guy */
530 /* abort any active trans that may scribble over the database */
531 urecovery_AbortAll(dbase);
533 ubik_print("Ubik: Synchronize database with server %s\n",
534 afs_inet_ntoa(otherHost));
537 (*dbase->truncate) (dbase, file, 0); /* truncate first */
538 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
539 tversion.counter = 0;
540 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
542 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
543 code = rx_Read(rxcall, tbuffer, tlen);
549 code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
559 /* sync data first, then write label and resync (resync done by setlabel call).
560 * This way, good label is only on good database. */
561 (*ubik_dbase->sync) (dbase, file);
562 code = (*ubik_dbase->setlabel) (dbase, file, avers);
563 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
564 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
565 LWP_NoYieldSignal(&dbase->version);
570 ("Ubik: Synchronize database with server %s failed (error = %d)\n",
571 afs_inet_ntoa(otherHost), code);
573 ubik_print("Ubik: Synchronize database completed\n");
581 register struct rx_call *rxcall;
587 * Update remote machines addresses in my server list
588 * Send back my addresses to caller of this RPC
589 * Returns zero on success, else 1.
592 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
593 register struct rx_call *rxcall;
594 UbikInterfaceAddr *inAddr, *outAddr;
596 struct ubik_server *ts, *tmp;
597 afs_uint32 remoteAddr; /* in net byte order */
598 int i, j, found = 0, probableMatch = 0;
600 /* copy the output parameters */
601 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++)
602 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
604 remoteAddr = htonl(inAddr->hostAddr[0]);
605 for (ts = ubik_servers; ts; ts = ts->next)
606 if (ts->addr[0] == remoteAddr) { /* both in net byte order */
612 /* verify that all addresses in the incoming RPC are
613 ** not part of other server entries in my CellServDB
615 for (i = 0; !found && (i < UBIK_MAX_INTERFACE_ADDR)
616 && inAddr->hostAddr[i]; i++) {
617 remoteAddr = htonl(inAddr->hostAddr[i]);
618 for (tmp = ubik_servers; (!found && tmp); tmp = tmp->next) {
619 if (ts == tmp) /* this is my server */
621 for (j = 0; (j < UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j];
623 if (remoteAddr == tmp->addr[j]) {
631 /* if (probableMatch) */
632 /* inconsistent addresses in CellServDB */
633 if (!probableMatch || found) {
634 ubik_print("Inconsistent Cell Info from server: ");
635 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
636 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
644 /* update our data structures */
645 for (i = 1; i < UBIK_MAX_INTERFACE_ADDR; i++)
646 ts->addr[i] = htonl(inAddr->hostAddr[i]);
648 ubik_print("ubik: A Remote Server has addresses: ");
649 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
650 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
659 struct ubik_server *ts;
662 ubik_print("Local CellServDB:");
663 for (ts = ubik_servers; ts; ts = ts->next, j++) {
664 ubik_print("Server %d: ", j);
665 for (i = 0; (i < UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
666 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
672 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
673 struct rx_call *rxcall;
674 struct ubik_tid *atid;
675 struct ubik_version *oldversionp;
676 struct ubik_version *newversionp;
679 struct ubik_dbase *dbase;
681 if ((code = ubik_CheckAuth(rxcall))) {
685 if (!ubik_currentTrans) {
688 /* sanity check to make sure only write trans appear here */
689 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
693 /* Should not get this for the sync site */
694 if (ubeacon_AmSyncSite()) {
698 dbase = ubik_currentTrans->dbase;
700 urecovery_CheckTid(atid);
701 if (!ubik_currentTrans) {
706 /* Set the label if its version matches the sync-site's */
707 if ((oldversionp->epoch == ubik_dbVersion.epoch)
708 && (oldversionp->counter == ubik_dbVersion.counter)) {
709 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
711 ubik_dbase->version = *newversionp;
712 ubik_dbVersion = *newversionp;