2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
16 #include <sys/types.h>
21 #include <netinet/in.h>
33 #include <afs/afsutil.h>
35 #define UBIK_INTERNALS
38 int (*ubik_CheckRXSecurityProc) ();
39 char *ubik_CheckRXSecurityRock;
40 void printServerInfo();
42 /* routines for handling requests remotely-submitted by the sync site. These are
43 only write transactions (we don't propagate read trans), and there is at most one
44 write transaction extant at any one time.
47 struct ubik_trans *ubik_currentTrans = 0;
51 register struct rx_call *acall;
53 register afs_int32 code;
54 if (ubik_CheckRXSecurityProc) {
55 code = (*ubik_CheckRXSecurityProc) (ubik_CheckRXSecurityRock, acall);
61 /* the rest of these guys handle remote execution of write
62 * transactions: this is the code executed on the other servers when a
63 * sync site is executing a write transaction.
66 SDISK_Begin(rxcall, atid)
67 register struct rx_call *rxcall;
68 struct ubik_tid *atid;
70 register afs_int32 code;
72 if ((code = ubik_CheckAuth(rxcall))) {
76 urecovery_CheckTid(atid);
77 if (ubik_currentTrans) {
78 /* If the thread is not waiting for lock - ok to end it */
79 #if !defined(UBIK_PAUSE)
80 if (ubik_currentTrans->locktype != LOCKWAIT) {
81 #endif /* UBIK_PAUSE */
82 udisk_end(ubik_currentTrans);
83 #if !defined(UBIK_PAUSE)
85 #endif /* UBIK_PAUSE */
86 ubik_currentTrans = (struct ubik_trans *)0;
88 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
89 if (!code && ubik_currentTrans) {
90 /* label this trans with the right trans id */
91 ubik_currentTrans->tid.epoch = atid->epoch;
92 ubik_currentTrans->tid.counter = atid->counter;
100 SDISK_Commit(rxcall, atid)
101 register struct rx_call *rxcall;
102 struct ubik_tid *atid;
104 register afs_int32 code;
105 register struct ubik_dbase *dbase;
107 if ((code = ubik_CheckAuth(rxcall))) {
111 if (!ubik_currentTrans) {
115 * sanity check to make sure only write trans appear here
117 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
121 dbase = ubik_currentTrans->dbase;
123 urecovery_CheckTid(atid);
124 if (!ubik_currentTrans) {
129 code = udisk_commit(ubik_currentTrans);
131 /* sync site should now match */
132 ubik_dbVersion = ubik_dbase->version;
139 SDISK_ReleaseLocks(rxcall, atid)
140 register struct rx_call *rxcall;
141 struct ubik_tid *atid;
143 register struct ubik_dbase *dbase;
144 register afs_int32 code;
146 if ((code = ubik_CheckAuth(rxcall))) {
150 if (!ubik_currentTrans) {
153 /* sanity check to make sure only write trans appear here */
154 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
158 dbase = ubik_currentTrans->dbase;
160 urecovery_CheckTid(atid);
161 if (!ubik_currentTrans) {
166 /* If the thread is not waiting for lock - ok to end it */
167 #if !defined(UBIK_PAUSE)
168 if (ubik_currentTrans->locktype != LOCKWAIT) {
169 #endif /* UBIK_PAUSE */
170 udisk_end(ubik_currentTrans);
171 #if !defined(UBIK_PAUSE)
173 #endif /* UBIK_PAUSE */
174 ubik_currentTrans = (struct ubik_trans *)0;
180 SDISK_Abort(rxcall, atid)
181 register struct rx_call *rxcall;
182 struct ubik_tid *atid;
184 register afs_int32 code;
185 register struct ubik_dbase *dbase;
187 if ((code = ubik_CheckAuth(rxcall))) {
191 if (!ubik_currentTrans) {
194 /* sanity check to make sure only write trans appear here */
195 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
199 dbase = ubik_currentTrans->dbase;
201 urecovery_CheckTid(atid);
202 if (!ubik_currentTrans) {
207 code = udisk_abort(ubik_currentTrans);
208 /* If the thread is not waiting for lock - ok to end it */
209 #if !defined(UBIK_PAUSE)
210 if (ubik_currentTrans->locktype != LOCKWAIT) {
211 #endif /* UBIK_PAUSE */
212 udisk_end(ubik_currentTrans);
213 #if !defined(UBIK_PAUSE)
215 #endif /* UBIK_PAUSE */
216 ubik_currentTrans = (struct ubik_trans *)0;
222 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
223 register struct rx_call *rxcall;
224 struct ubik_tid *atid;
225 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
227 register afs_int32 code;
228 register struct ubik_dbase *dbase;
229 struct ubik_trans *ubik_thisTrans;
231 if ((code = ubik_CheckAuth(rxcall))) {
234 if (!ubik_currentTrans) {
237 /* sanity check to make sure only write trans appear here */
238 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
244 dbase = ubik_currentTrans->dbase;
246 urecovery_CheckTid(atid);
247 if (!ubik_currentTrans) {
252 ubik_thisTrans = ubik_currentTrans;
253 code = ulock_getLock(ubik_currentTrans, atype, 1);
255 /* While waiting, the transaction may have been ended/
256 * aborted from under us (urecovery_CheckTid). In that
257 * case, end the transaction here.
259 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
260 udisk_end(ubik_thisTrans);
268 /* Write a vector of data */
270 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
271 register struct rx_call *rxcall;
272 struct ubik_tid *atid;
273 iovec_wrt *io_vector;
274 iovec_buf *io_buffer;
276 afs_int32 code, i, offset;
277 struct ubik_dbase *dbase;
278 struct ubik_iovec *iovec;
281 if ((code = ubik_CheckAuth(rxcall))) {
284 if (!ubik_currentTrans) {
287 /* sanity check to make sure only write trans appear here */
288 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
292 dbase = ubik_currentTrans->dbase;
294 urecovery_CheckTid(atid);
295 if (!ubik_currentTrans) {
300 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
301 iobuf = (char *)io_buffer->iovec_buf_val;
302 for (i = 0, offset = 0; i < io_vector->iovec_wrt_len; i++) {
303 /* Sanity check for going off end of buffer */
304 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
308 udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
309 iovec[i].position, iovec[i].length);
314 offset += iovec[i].length;
322 SDISK_Write(rxcall, atid, afile, apos, adata)
323 register struct rx_call *rxcall;
324 struct ubik_tid *atid;
325 afs_int32 afile, apos;
326 register bulkdata *adata;
328 register afs_int32 code;
329 register struct ubik_dbase *dbase;
331 if ((code = ubik_CheckAuth(rxcall))) {
334 if (!ubik_currentTrans) {
337 /* sanity check to make sure only write trans appear here */
338 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
342 dbase = ubik_currentTrans->dbase;
344 urecovery_CheckTid(atid);
345 if (!ubik_currentTrans) {
350 udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos,
351 adata->bulkdata_len);
357 SDISK_Truncate(rxcall, atid, afile, alen)
358 register struct rx_call *rxcall;
359 struct ubik_tid *atid;
363 register afs_int32 code;
364 register struct ubik_dbase *dbase;
366 if ((code = ubik_CheckAuth(rxcall))) {
369 if (!ubik_currentTrans) {
372 /* sanity check to make sure only write trans appear here */
373 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
377 dbase = ubik_currentTrans->dbase;
379 urecovery_CheckTid(atid);
380 if (!ubik_currentTrans) {
384 code = udisk_truncate(ubik_currentTrans, afile, alen);
390 SDISK_GetVersion(rxcall, aversion)
391 register struct rx_call *rxcall;
392 register struct ubik_version *aversion;
394 register afs_int32 code;
396 if ((code = ubik_CheckAuth(rxcall))) {
401 * If we are the sync site, recovery shouldn't be running on any
402 * other site. We shouldn't be getting this RPC as long as we are
403 * the sync site. To prevent any unforseen activity, we should
404 * reject this RPC until we have recognized that we are not the
405 * sync site anymore, and/or if we have any pending WRITE
406 * transactions that have to complete. This way we can be assured
407 * that this RPC would not block any pending transactions that
408 * should either fail or pass. If we have recognized the fact that
409 * we are not the sync site any more, all write transactions would
410 * fail with UNOQUORUM anyway.
412 if (ubeacon_AmSyncSite()) {
417 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
420 /* tell other side there's no dbase */
422 aversion->counter = 0;
428 SDISK_GetFile(rxcall, file, version)
429 register struct rx_call *rxcall;
430 register afs_int32 file;
431 struct ubik_version *version;
433 register afs_int32 code;
434 register struct ubik_dbase *dbase;
435 register afs_int32 offset;
436 struct ubik_stat ubikstat;
441 if ((code = ubik_CheckAuth(rxcall))) {
444 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
445 * a sanity check, anyway.
446 if (ubeacon_AmSyncSite()) {
452 code = (*dbase->stat) (dbase, file, &ubikstat);
457 length = ubikstat.size;
458 tlen = htonl(length);
459 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
460 if (code != sizeof(afs_int32)) {
466 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
467 code = (*dbase->read) (dbase, file, tbuffer, offset, tlen);
472 code = rx_Write(rxcall, tbuffer, tlen);
480 code = (*dbase->getlabel) (dbase, file, version); /* return the dbase, too */
486 SDISK_SendFile(rxcall, file, length, avers)
487 register struct rx_call *rxcall;
490 struct ubik_version *avers;
492 register afs_int32 code;
493 register struct ubik_dbase *dbase;
496 struct ubik_version tversion;
498 struct rx_peer *tpeer;
499 struct rx_connection *tconn;
500 afs_uint32 otherHost;
502 /* send the file back to the requester */
504 if ((code = ubik_CheckAuth(rxcall))) {
508 /* next, we do a sanity check to see if the guy sending us the database is
509 * the guy we think is the sync site. It turns out that we might not have
510 * decided yet that someone's the sync site, but they could have enough
511 * votes from others to be sync site anyway, and could send us the database
512 * in advance of getting our votes. This is fine, what we're really trying
513 * to check is that some authenticated bogon isn't sending a random database
514 * into another configuration. This could happen on a bad configuration
515 * screwup. Thus, we only object if we're sure we know who the sync site
516 * is, and it ain't the guy talking to us.
518 offset = uvote_GetSyncSite();
519 tconn = rx_ConnectionOf(rxcall);
520 tpeer = rx_PeerOf(tconn);
521 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
522 if (offset && offset != otherHost) {
523 /* we *know* this is the wrong guy */
531 /* abort any active trans that may scribble over the database */
532 urecovery_AbortAll(dbase);
534 ubik_print("Ubik: Synchronize database with server %s\n",
535 afs_inet_ntoa(otherHost));
538 (*dbase->truncate) (dbase, file, 0); /* truncate first */
539 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
540 tversion.counter = 0;
541 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
542 memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
544 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
545 code = rx_Read(rxcall, tbuffer, tlen);
551 code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
561 /* sync data first, then write label and resync (resync done by setlabel call).
562 * This way, good label is only on good database. */
563 (*ubik_dbase->sync) (dbase, file);
564 code = (*ubik_dbase->setlabel) (dbase, file, avers);
565 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
566 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
567 LWP_NoYieldSignal(&dbase->version);
572 ("Ubik: Synchronize database with server %s failed (error = %d)\n",
573 afs_inet_ntoa(otherHost), code);
575 ubik_print("Ubik: Synchronize database completed\n");
583 register struct rx_call *rxcall;
589 * Update remote machines addresses in my server list
590 * Send back my addresses to caller of this RPC
591 * Returns zero on success, else 1.
594 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
595 register struct rx_call *rxcall;
596 UbikInterfaceAddr *inAddr, *outAddr;
598 struct ubik_server *ts, *tmp;
599 afs_uint32 remoteAddr; /* in net byte order */
600 int i, j, found = 0, probableMatch = 0;
602 /* copy the output parameters */
603 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++)
604 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
606 remoteAddr = htonl(inAddr->hostAddr[0]);
607 for (ts = ubik_servers; ts; ts = ts->next)
608 if (ts->addr[0] == remoteAddr) { /* both in net byte order */
614 /* verify that all addresses in the incoming RPC are
615 ** not part of other server entries in my CellServDB
617 for (i = 0; !found && (i < UBIK_MAX_INTERFACE_ADDR)
618 && inAddr->hostAddr[i]; i++) {
619 remoteAddr = htonl(inAddr->hostAddr[i]);
620 for (tmp = ubik_servers; (!found && tmp); tmp = tmp->next) {
621 if (ts == tmp) /* this is my server */
623 for (j = 0; (j < UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j];
625 if (remoteAddr == tmp->addr[j]) {
633 /* if (probableMatch) */
634 /* inconsistent addresses in CellServDB */
635 if (!probableMatch || found) {
636 ubik_print("Inconsistent Cell Info from server: ");
637 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
638 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
646 /* update our data structures */
647 for (i = 1; i < UBIK_MAX_INTERFACE_ADDR; i++)
648 ts->addr[i] = htonl(inAddr->hostAddr[i]);
650 ubik_print("ubik: A Remote Server has addresses: ");
651 for (i = 0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
652 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
661 struct ubik_server *ts;
664 ubik_print("Local CellServDB:");
665 for (ts = ubik_servers; ts; ts = ts->next, j++) {
666 ubik_print("Server %d: ", j);
667 for (i = 0; (i < UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
668 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
674 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
675 struct rx_call *rxcall;
676 struct ubik_tid *atid;
677 struct ubik_version *oldversionp;
678 struct ubik_version *newversionp;
681 struct ubik_dbase *dbase;
683 if ((code = ubik_CheckAuth(rxcall))) {
687 if (!ubik_currentTrans) {
690 /* sanity check to make sure only write trans appear here */
691 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
695 /* Should not get this for the sync site */
696 if (ubeacon_AmSyncSite()) {
700 dbase = ubik_currentTrans->dbase;
702 urecovery_CheckTid(atid);
703 if (!ubik_currentTrans) {
708 /* Set the label if its version matches the sync-site's */
709 if ((oldversionp->epoch == ubik_dbVersion.epoch)
710 && (oldversionp->counter == ubik_dbVersion.counter)) {
711 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
713 ubik_dbase->version = *newversionp;
714 ubik_dbVersion = *newversionp;