2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #include <sys/types.h>
21 #include <netinet/in.h>
33 #include <afs/afsutil.h>
35 #define UBIK_INTERNALS
38 int (*ubik_CheckRXSecurityProc) ();
39 char *ubik_CheckRXSecurityRock;
40 void printServerInfo();
42 /* routines for handling requests remotely-submitted by the sync site. These are
43 only write transactions (we don't propagate read trans), and there is at most one
44 write transaction extant at any one time.
47 struct ubik_trans *ubik_currentTrans = 0;
51 register struct rx_call *acall;
53 register afs_int32 code;
54 if (ubik_CheckRXSecurityProc) {
55 code = (*ubik_CheckRXSecurityProc) (ubik_CheckRXSecurityRock, acall);
61 /* the rest of these guys handle remote execution of write
62 * transactions: this is the code executed on the other servers when a
63 * sync site is executing a write transaction.
66 SDISK_Begin(rxcall, atid)
67 register struct rx_call *rxcall;
68 struct ubik_tid *atid;
70 register afs_int32 code;
72 if ((code = ubik_CheckAuth(rxcall))) {
76 urecovery_CheckTid(atid);
77 if (ubik_currentTrans) {
78 /* If the thread is not waiting for lock - ok to end it */
79 #if !defined(UBIK_PAUSE)
80 if (ubik_currentTrans->locktype != LOCKWAIT) {
81 #endif /* UBIK_PAUSE */
82 udisk_end(ubik_currentTrans);
83 #if !defined(UBIK_PAUSE)
85 #endif /* UBIK_PAUSE */
86 ubik_currentTrans = (struct ubik_trans *)0;
88 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
89 if (!code && ubik_currentTrans) {
90 /* label this trans with the right trans id */
91 ubik_currentTrans->tid.epoch = atid->epoch;
92 ubik_currentTrans->tid.counter = atid->counter;
100 SDISK_Commit(rxcall, atid)
101 register struct rx_call *rxcall;
102 struct ubik_tid *atid;
104 register afs_int32 code;
105 register struct ubik_dbase *dbase;
107 if ((code = ubik_CheckAuth(rxcall))) {
111 if (!ubik_currentTrans) {
115 * sanity check to make sure only write trans appear here
117 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
121 dbase = ubik_currentTrans->dbase;
123 urecovery_CheckTid(atid);
124 if (!ubik_currentTrans) {
129 code = udisk_commit(ubik_currentTrans);
131 /* sync site should now match */
132 ubik_dbVersion = ubik_dbase->version;
139 SDISK_ReleaseLocks(rxcall, atid)
140 register struct rx_call *rxcall;
141 struct ubik_tid *atid;
143 register struct ubik_dbase *dbase;
144 register afs_int32 code;
146 if ((code = ubik_CheckAuth(rxcall))) {
150 if (!ubik_currentTrans) {
153 /* sanity check to make sure only write trans appear here */
154 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
158 dbase = ubik_currentTrans->dbase;
160 urecovery_CheckTid(atid);
161 if (!ubik_currentTrans) {
166 /* If the thread is not waiting for lock - ok to end it */
167 #if !defined(UBIK_PAUSE)
168 if (ubik_currentTrans->locktype != LOCKWAIT) {
169 #endif /* UBIK_PAUSE */
170 udisk_end(ubik_currentTrans);
171 #if !defined(UBIK_PAUSE)
173 #endif /* UBIK_PAUSE */
174 ubik_currentTrans = (struct ubik_trans *)0;
180 SDISK_Abort(rxcall, atid)
181 register struct rx_call *rxcall;
182 struct ubik_tid *atid;
184 register afs_int32 code;
185 register struct ubik_dbase *dbase;
187 if ((code = ubik_CheckAuth(rxcall))) {
191 if (!ubik_currentTrans) {
194 /* sanity check to make sure only write trans appear here */
195 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
199 dbase = ubik_currentTrans->dbase;
201 urecovery_CheckTid(atid);
202 if (!ubik_currentTrans) {
207 code = udisk_abort(ubik_currentTrans);
208 /* If the thread is not waiting for lock - ok to end it */
209 #if !defined(UBIK_PAUSE)
210 if (ubik_currentTrans->locktype != LOCKWAIT) {
211 #endif /* UBIK_PAUSE */
212 udisk_end(ubik_currentTrans);
213 #if !defined(UBIK_PAUSE)
215 #endif /* UBIK_PAUSE */
216 ubik_currentTrans = (struct ubik_trans *)0;
222 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
223 register struct rx_call *rxcall;
224 struct ubik_tid *atid;
225 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
227 register afs_int32 code;
228 register struct ubik_dbase *dbase;
229 struct ubik_trans *ubik_thisTrans;
231 if ((code = ubik_CheckAuth(rxcall))) {
234 if (!ubik_currentTrans) {
237 /* sanity check to make sure only write trans appear here */
238 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
244 dbase = ubik_currentTrans->dbase;
246 urecovery_CheckTid(atid);
247 if (!ubik_currentTrans) {
252 ubik_thisTrans = ubik_currentTrans;
253 code = ulock_getLock(ubik_currentTrans, atype, 1);
255 /* While waiting, the transaction may have been ended/
256 * aborted from under us (urecovery_CheckTid). In that
257 * case, end the transaction here.
259 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
260 udisk_end(ubik_thisTrans);
268 /* Write a vector of data */
270 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
271 register struct rx_call *rxcall;
272 struct ubik_tid *atid;
273 iovec_wrt *io_vector;
274 iovec_buf *io_buffer;
276 afs_int32 code, i, offset;
277 struct ubik_dbase *dbase;
278 struct ubik_iovec *iovec;
281 if ((code = ubik_CheckAuth(rxcall))) {
284 if (!ubik_currentTrans) {
287 /* sanity check to make sure only write trans appear here */
288 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
292 dbase = ubik_currentTrans->dbase;
294 urecovery_CheckTid(atid);
295 if (!ubik_currentTrans) {
300 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
301 iobuf = (char *)io_buffer->iovec_buf_val;
302 for (i = 0, offset = 0; i < io_vector->iovec_wrt_len; i++) {
303 /* Sanity check for going off end of buffer */
304 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
308 udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
309 iovec[i].position, iovec[i].length);
314 offset += iovec[i].length;
322 SDISK_Write(rxcall, atid, afile, apos, adata)
323 register struct rx_call *rxcall;
324 struct ubik_tid *atid;
325 afs_int32 afile, apos;
326 register bulkdata *adata;
328 register afs_int32 code;
329 register struct ubik_dbase *dbase;
331 if ((code = ubik_CheckAuth(rxcall))) {
334 if (!ubik_currentTrans) {
337 /* sanity check to make sure only write trans appear here */
338 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
342 dbase = ubik_currentTrans->dbase;
344 urecovery_CheckTid(atid);
345 if (!ubik_currentTrans) {
350 udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos,
351 adata->bulkdata_len);
357 SDISK_Truncate(rxcall, atid, afile, alen)
358 register struct rx_call *rxcall;
359 struct ubik_tid *atid;
363 register afs_int32 code;
364 register struct ubik_dbase *dbase;
366 if ((code = ubik_CheckAuth(rxcall))) {
369 if (!ubik_currentTrans) {
372 /* sanity check to make sure only write trans appear here */
373 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
377 dbase = ubik_currentTrans->dbase;
379 urecovery_CheckTid(atid);
380 if (!ubik_currentTrans) {
384 code = udisk_truncate(ubik_currentTrans, afile, alen);
390 SDISK_GetVersion(rxcall, aversion)
391 register struct rx_call *rxcall;
392 register struct ubik_version *aversion;
394 register afs_int32 code;
396 if ((code = ubik_CheckAuth(rxcall))) {
401 * If we are the sync site, recovery shouldn't be running on any
402 * other site. We shouldn't be getting this RPC as long as we are
403 * the sync site. To prevent any unforseen activity, we should
404 * reject this RPC until we have recognized that we are not the
405 * sync site anymore, and/or if we have any pending WRITE
406 * transactions that have to complete. This way we can be assured
407 * that this RPC would not block any pending transactions that
408 * should either fail or pass. If we have recognized the fact that
409 * we are not the sync site any more, all write transactions would
410 * fail with UNOQUORUM anyway.
412 if (ubeacon_AmSyncSite()) {
417 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
420 /* tell other side there's no dbase */
422 aversion->counter = 0;
428 SDISK_GetFile(rxcall, file, version)
429 register struct rx_call *rxcall;
430 register afs_int32 file;
431 struct ubik_version *version;
433 register afs_int32 code;
434 register struct ubik_dbase *dbase;
435 register afs_int32 offset;
436 struct ubik_stat ubikstat;
441 if ((code = ubik_CheckAuth(rxcall))) {
444 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
445 * a sanity check, anyway.
446 if (ubeacon_AmSyncSite()) {
452 code = (*dbase->stat) (dbase, file, &ubikstat);
457 length = ubikstat.size;
458 tlen = htonl(length);
459 code = rx_Write(rxcall, (char *)&tlen, sizeof(afs_int32));
460 if (code != sizeof(afs_int32)) {
462 ubik_dprint("Rx-write length error=%d\n", code);
467 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
468 code = (*dbase->read) (dbase, file, tbuffer, offset, tlen);
471 ubik_dprint("read failed error=%d\n", code);
474 code = rx_Write(rxcall, tbuffer, tlen);
477 ubik_dprint("Rx-write length error=%d\n", code);
483 code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */
489 SDISK_SendFile(rxcall, file, length, avers)
490 register struct rx_call *rxcall;
493 struct ubik_version *avers;
495 register afs_int32 code;
496 register struct ubik_dbase *dbase;
499 struct ubik_version tversion;
501 struct rx_peer *tpeer;
502 struct rx_connection *tconn;
503 afs_uint32 otherHost;
505 /* send the file back to the requester */
507 if ((code = ubik_CheckAuth(rxcall))) {
511 /* next, we do a sanity check to see if the guy sending us the database is
512 * the guy we think is the sync site. It turns out that we might not have
513 * decided yet that someone's the sync site, but they could have enough
514 * votes from others to be sync site anyway, and could send us the database
515 * in advance of getting our votes. This is fine, what we're really trying
516 * to check is that some authenticated bogon isn't sending a random database
517 * into another configuration. This could happen on a bad configuration
518 * screwup. Thus, we only object if we're sure we know who the sync site
519 * is, and it ain't the guy talking to us.
521 offset = uvote_GetSyncSite();
522 tconn = rx_ConnectionOf(rxcall);
523 tpeer = rx_PeerOf(tconn);
524 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
525 if (offset && offset != otherHost) {
526 /* we *know* this is the wrong guy */
534 /* abort any active trans that may scribble over the database */
535 urecovery_AbortAll(dbase);
537 ubik_print("Ubik: Synchronize database with server %s\n",
538 afs_inet_ntoa(otherHost));
541 (*dbase->truncate) (dbase, file, 0); /* truncate first */
542 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
543 tversion.counter = 0;
544 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
545 memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
547 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
548 code = rx_Read(rxcall, tbuffer, tlen);
551 ubik_dprint("Rx-read length error=%d\n", code);
555 code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
558 ubik_dprint("write failed error=%d\n", code);
566 /* sync data first, then write label and resync (resync done by setlabel call).
567 * This way, good label is only on good database. */
568 (*ubik_dbase->sync) (dbase, file);
569 code = (*ubik_dbase->setlabel) (dbase, file, avers);
570 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
571 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
572 LWP_NoYieldSignal(&dbase->version);
577 ("Ubik: Synchronize database with server %s failed (error = %d)\n",
578 afs_inet_ntoa(otherHost), code);
580 ubik_print("Ubik: Synchronize database completed\n");
588 register struct rx_call *rxcall;
594 * Update remote machines addresses in my server list
595 * Send back my addresses to caller of this RPC
596 * Returns zero on success, else 1.
599 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
600 register struct rx_call *rxcall;
601 UbikInterfaceAddr *inAddr, *outAddr;
603 struct ubik_server *ts, *tmp;
604 afs_uint32 remoteAddr; /* in net byte order */
605 int i, j, found = 0, probableMatch = 0;
607 /* copy the output parameters */
608 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++)
609 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
611 remoteAddr = htonl(inAddr->hostAddr[0]);
612 for (ts = ubik_servers; ts; ts = ts->next)
613 if (ts->addr[0] == remoteAddr) { /* both in net byte order */
619 /* verify that all addresses in the incoming RPC are
620 ** not part of other server entries in my CellServDB
622 for (i = 0; !found && (i < UBIK_MAX_INTERFACE_ADDR)
623 && inAddr->hostAddr[i]; i++) {
624 remoteAddr = htonl(inAddr->hostAddr[i]);
625 for (tmp = ubik_servers; (!found && tmp); tmp = tmp->next) {
626 if (ts == tmp) /* this is my server */
628 for (j = 0; (j < UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j];
630 if (remoteAddr == tmp->addr[j]) {
638 /* if (probableMatch) */
639 /* inconsistent addresses in CellServDB */
640 if (!probableMatch || found) {
641 ubik_print("Inconsistent Cell Info from server: ");
642 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
643 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
651 /* update our data structures */
652 for (i = 1; i < UBIK_MAX_INTERFACE_ADDR; i++)
653 ts->addr[i] = htonl(inAddr->hostAddr[i]);
655 ubik_print("ubik: A Remote Server has addresses: ");
656 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
657 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
666 struct ubik_server *ts;
669 ubik_print("Local CellServDB:");
670 for (ts = ubik_servers; ts; ts = ts->next, j++) {
671 ubik_print("Server %d: ", j);
672 for (i = 0; (i < UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
673 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
679 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
680 struct rx_call *rxcall;
681 struct ubik_tid *atid;
682 struct ubik_version *oldversionp;
683 struct ubik_version *newversionp;
686 struct ubik_dbase *dbase;
688 if ((code = ubik_CheckAuth(rxcall))) {
692 if (!ubik_currentTrans) {
695 /* sanity check to make sure only write trans appear here */
696 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
700 /* Should not get this for the sync site */
701 if (ubeacon_AmSyncSite()) {
705 dbase = ubik_currentTrans->dbase;
707 urecovery_CheckTid(atid);
708 if (!ubik_currentTrans) {
713 /* Set the label if its version matches the sync-site's */
714 if ((oldversionp->epoch == ubik_dbVersion.epoch)
715 && (oldversionp->counter == ubik_dbVersion.counter)) {
716 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
718 ubik_dbase->version = *newversionp;
719 ubik_dbVersion = *newversionp;