2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
11 #include <afs/param.h>
15 #include <sys/types.h>
20 #include <netinet/in.h>
33 #define UBIK_INTERNALS
36 int (*ubik_CheckRXSecurityProc)();
37 char *ubik_CheckRXSecurityRock;
38 void printServerInfo();
40 /* routines for handling requests remotely-submitted by the sync site. These are
41 only write transactions (we don't propagate read trans), and there is at most one
42 write transaction extant at any one time.
45 struct ubik_trans *ubik_currentTrans = 0;
49 register struct rx_call *acall; {
50 register afs_int32 code;
51 if (ubik_CheckRXSecurityProc) {
52 code = (*ubik_CheckRXSecurityProc)(ubik_CheckRXSecurityRock, acall);
58 /* the rest of these guys handle remote execution of write
59 * transactions: this is the code executed on the other servers when a
60 * sync site is executing a write transaction.
62 SDISK_Begin(rxcall, atid)
63 register struct rx_call *rxcall;
64 struct ubik_tid *atid;
66 register afs_int32 code;
68 if ((code = ubik_CheckAuth(rxcall))) {
72 urecovery_CheckTid(atid);
73 if (ubik_currentTrans) {
74 /* If the thread is not waiting for lock - ok to end it */
75 if (ubik_currentTrans->locktype != LOCKWAIT) {
76 udisk_end(ubik_currentTrans);
78 ubik_currentTrans = (struct ubik_trans *) 0;
80 code = udisk_begin(ubik_dbase, UBIK_WRITETRANS, &ubik_currentTrans);
81 if (!code && ubik_currentTrans) {
82 /* label this trans with the right trans id */
83 ubik_currentTrans->tid.epoch = atid->epoch;
84 ubik_currentTrans->tid.counter = atid->counter;
90 SDISK_Commit(rxcall, atid)
91 register struct rx_call *rxcall;
92 struct ubik_tid *atid;
94 register afs_int32 code;
95 register struct ubik_dbase *dbase;
97 if ((code = ubik_CheckAuth(rxcall))) {
101 if (!ubik_currentTrans) {
105 * sanity check to make sure only write trans appear here
107 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
111 dbase = ubik_currentTrans->dbase;
113 urecovery_CheckTid(atid);
114 if (!ubik_currentTrans) {
119 code = udisk_commit(ubik_currentTrans);
121 /* sync site should now match */
122 ubik_dbVersion = ubik_dbase->version;
128 SDISK_ReleaseLocks(rxcall, atid)
129 register struct rx_call *rxcall;
130 struct ubik_tid *atid;
132 register struct ubik_dbase *dbase;
133 register afs_int32 code;
135 if ((code = ubik_CheckAuth(rxcall))) {
139 if (!ubik_currentTrans) {
142 /* sanity check to make sure only write trans appear here */
143 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
147 dbase = ubik_currentTrans->dbase;
149 urecovery_CheckTid(atid);
150 if (!ubik_currentTrans) {
155 /* If the thread is not waiting for lock - ok to end it */
156 if (ubik_currentTrans->locktype != LOCKWAIT) {
157 udisk_end(ubik_currentTrans);
159 ubik_currentTrans = (struct ubik_trans *) 0;
164 SDISK_Abort(rxcall, atid)
165 register struct rx_call *rxcall;
166 struct ubik_tid *atid;
168 register afs_int32 code;
169 register struct ubik_dbase *dbase;
171 if ((code = ubik_CheckAuth(rxcall))) {
175 if (!ubik_currentTrans) {
178 /* sanity check to make sure only write trans appear here */
179 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
183 dbase = ubik_currentTrans->dbase;
185 urecovery_CheckTid(atid);
186 if (!ubik_currentTrans) {
191 code = udisk_abort(ubik_currentTrans);
192 /* If the thread is not waiting for lock - ok to end it */
193 if (ubik_currentTrans->locktype != LOCKWAIT) {
194 udisk_end(ubik_currentTrans);
196 ubik_currentTrans = (struct ubik_trans *) 0;
201 SDISK_Lock(rxcall, atid, afile, apos, alen, atype)
202 register struct rx_call *rxcall;
203 struct ubik_tid *atid;
204 afs_int32 afile, apos, alen, atype; /* apos and alen are not used */
206 register afs_int32 code;
207 register struct ubik_dbase *dbase;
208 struct ubik_trans *ubik_thisTrans;
210 if ((code = ubik_CheckAuth(rxcall))) {
213 if (!ubik_currentTrans) {
216 /* sanity check to make sure only write trans appear here */
217 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
223 dbase = ubik_currentTrans->dbase;
225 urecovery_CheckTid(atid);
226 if (!ubik_currentTrans) {
231 ubik_thisTrans = ubik_currentTrans;
232 code = ulock_getLock(ubik_currentTrans, atype, 1);
234 /* While waiting, the transaction may have been ended/
235 * aborted from under us (urecovery_CheckTid). In that
236 * case, end the transaction here.
238 if (!code && (ubik_currentTrans != ubik_thisTrans)) {
239 udisk_end(ubik_thisTrans);
247 /* Write a vector of data */
248 SDISK_WriteV(rxcall, atid, io_vector, io_buffer)
249 register struct rx_call *rxcall;
250 struct ubik_tid *atid;
251 iovec_wrt *io_vector;
252 iovec_buf *io_buffer;
254 afs_int32 code, i, offset;
255 struct ubik_dbase *dbase;
256 struct ubik_iovec *iovec;
259 if ((code = ubik_CheckAuth(rxcall))) {
262 if (!ubik_currentTrans) {
265 /* sanity check to make sure only write trans appear here */
266 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
270 dbase = ubik_currentTrans->dbase;
272 urecovery_CheckTid(atid);
273 if (!ubik_currentTrans) {
278 iovec = (struct ubik_iovec *)io_vector->iovec_wrt_val;
279 iobuf = (char *)io_buffer->iovec_buf_val;
280 for (i=0, offset=0; i<io_vector->iovec_wrt_len; i++) {
281 /* Sanity check for going off end of buffer */
282 if ((offset + iovec[i].length) > io_buffer->iovec_buf_len) {
285 code = udisk_write(ubik_currentTrans, iovec[i].file, &iobuf[offset],
286 iovec[i].position, iovec[i].length);
290 offset += iovec[i].length;
297 SDISK_Write(rxcall, atid, afile, apos, adata)
298 register struct rx_call *rxcall;
299 struct ubik_tid *atid;
300 afs_int32 afile, apos;
301 register bulkdata *adata;
303 register afs_int32 code;
304 register struct ubik_dbase *dbase;
306 if ((code = ubik_CheckAuth(rxcall))) {
309 if (!ubik_currentTrans) {
312 /* sanity check to make sure only write trans appear here */
313 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
317 dbase = ubik_currentTrans->dbase;
319 urecovery_CheckTid(atid);
320 if (!ubik_currentTrans) {
324 code = udisk_write(ubik_currentTrans, afile, adata->bulkdata_val, apos, adata->bulkdata_len);
329 SDISK_Truncate(rxcall, atid, afile, alen)
330 register struct rx_call *rxcall;
331 struct ubik_tid *atid;
335 register afs_int32 code;
336 register struct ubik_dbase *dbase;
338 if ((code = ubik_CheckAuth(rxcall))) {
341 if (!ubik_currentTrans) {
344 /* sanity check to make sure only write trans appear here */
345 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
349 dbase = ubik_currentTrans->dbase;
351 urecovery_CheckTid(atid);
352 if (!ubik_currentTrans) {
356 code = udisk_truncate(ubik_currentTrans, afile, alen);
361 SDISK_GetVersion(rxcall, aversion)
362 register struct rx_call *rxcall;
363 register struct ubik_version *aversion;
365 register afs_int32 code;
367 if ((code = ubik_CheckAuth(rxcall))) {
372 * If we are the sync site, recovery shouldn't be running on any
373 * other site. We shouldn't be getting this RPC as long as we are
374 * the sync site. To prevent any unforseen activity, we should
375 * reject this RPC until we have recognized that we are not the
376 * sync site anymore, and/or if we have any pending WRITE
377 * transactions that have to complete. This way we can be assured
378 * that this RPC would not block any pending transactions that
379 * should either fail or pass. If we have recognized the fact that
380 * we are not the sync site any more, all write transactions would
381 * fail with UNOQUORUM anyway.
383 if (ubeacon_AmSyncSite()) {
388 code = (*ubik_dbase->getlabel) (ubik_dbase, 0, aversion);
391 /* tell other side there's no dbase */
393 aversion->counter = 0;
398 SDISK_GetFile(rxcall, file, version)
399 register struct rx_call *rxcall;
400 register afs_int32 file;
401 struct ubik_version *version;
403 register afs_int32 code;
404 register struct ubik_dbase *dbase;
405 register afs_int32 offset;
406 struct ubik_stat ubikstat;
411 if ((code = ubik_CheckAuth(rxcall))) {
414 /* temporarily disabled because it causes problems for migration tool. Hey, it's just
415 * a sanity check, anyway.
416 if (ubeacon_AmSyncSite()) {
422 code = (*dbase->stat) (dbase, file, &ubikstat);
427 length = ubikstat.size;
428 tlen = htonl(length);
429 code = rx_Write(rxcall, &tlen, sizeof(afs_int32));
430 if (code != sizeof(afs_int32)) {
436 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
437 code = (*dbase->read)(dbase, file, tbuffer, offset, tlen);
442 code = rx_Write(rxcall, tbuffer, tlen);
450 code = (*dbase->getlabel)(dbase, file, version); /* return the dbase, too */
455 SDISK_SendFile(rxcall, file, length, avers)
456 register struct rx_call *rxcall;
459 struct ubik_version *avers;
461 register afs_int32 code;
462 register struct ubik_dbase *dbase;
465 struct ubik_version tversion;
467 struct rx_peer *tpeer;
468 struct rx_connection *tconn;
469 afs_uint32 otherHost;
471 /* send the file back to the requester */
473 if ((code = ubik_CheckAuth(rxcall))) {
477 /* next, we do a sanity check to see if the guy sending us the database is
478 * the guy we think is the sync site. It turns out that we might not have
479 * decided yet that someone's the sync site, but they could have enough
480 * votes from others to be sync site anyway, and could send us the database
481 * in advance of getting our votes. This is fine, what we're really trying
482 * to check is that some authenticated bogon isn't sending a random database
483 * into another configuration. This could happen on a bad configuration
484 * screwup. Thus, we only object if we're sure we know who the sync site
485 * is, and it ain't the guy talking to us.
487 offset = uvote_GetSyncSite();
488 tconn = rx_ConnectionOf(rxcall);
489 tpeer = rx_PeerOf(tconn);
490 otherHost = ubikGetPrimaryInterfaceAddr(rx_HostOf(tpeer));
491 if (offset && offset != otherHost ) {
492 /* we *know* this is the wrong guy */
500 /* abort any active trans that may scribble over the database */
501 urecovery_AbortAll(dbase);
503 ubik_print("Ubik: Synchronize database with server %s\n",
504 afs_inet_ntoa(otherHost));
507 (*dbase->truncate) (dbase, file, 0); /* truncate first */
508 tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
509 tversion.counter = 0;
510 (*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
512 tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
513 code = rx_Read(rxcall, tbuffer, tlen);
519 code = (*dbase->write)(dbase, file, tbuffer, offset, tlen);
529 /* sync data first, then write label and resync (resync done by setlabel call).
530 This way, good label is only on good database. */
531 (*ubik_dbase->sync)(dbase, file);
532 code = (*ubik_dbase->setlabel)(dbase, file, avers);
533 memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
534 udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
535 LWP_NoYieldSignal(&dbase->version);
539 ubik_print("Ubik: Synchronize database with server %s failed (error = %d)\n",
540 afs_inet_ntoa(otherHost), code);
542 ubik_print("Ubik: Synchronize database completed\n");
549 register struct rx_call *rxcall;
555 * Update remote machines addresses in my server list
556 * Send back my addresses to caller of this RPC
557 * Returns zero on success, else 1.
559 SDISK_UpdateInterfaceAddr(rxcall, inAddr, outAddr)
560 register struct rx_call *rxcall;
561 UbikInterfaceAddr *inAddr, *outAddr;
563 struct ubik_server *ts, *tmp;
564 afs_uint32 remoteAddr; /* in net byte order */
565 int i, j, found=0, probableMatch=0;
567 /* copy the output parameters */
568 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR; i++)
569 outAddr->hostAddr[i] = ntohl(ubik_host[i]);
571 remoteAddr = htonl(inAddr->hostAddr[0]);
572 for(ts = ubik_servers; ts; ts=ts->next)
573 if ( ts->addr[0] == remoteAddr ) /* both in net byte order */
581 /* verify that all addresses in the incoming RPC are
582 ** not part of other server entries in my CellServDB
584 for ( i=0; !found && (i<UBIK_MAX_INTERFACE_ADDR)
585 && inAddr->hostAddr[i]; i++)
587 remoteAddr = htonl(inAddr->hostAddr[i]);
588 for(tmp = ubik_servers; (!found && tmp); tmp=tmp->next)
590 if ( ts == tmp ) /* this is my server */
592 for ( j=0; (j<UBIK_MAX_INTERFACE_ADDR) && tmp->addr[j]; j++)
593 if ( remoteAddr == tmp->addr[j] )
600 } /* if (probableMatch) */
602 /* inconsistent addresses in CellServDB */
603 if ( !probableMatch || found )
605 ubik_print("Inconsistent Cell Info from server: ");
606 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && inAddr->hostAddr[i]; i++)
607 ubik_print("%s ", afs_inet_ntoa(htonl(inAddr->hostAddr[i])));
613 /* update our data structures */
614 for ( i=1; i < UBIK_MAX_INTERFACE_ADDR; i++)
615 ts->addr[i] = htonl(inAddr->hostAddr[i]);
617 ubik_print("ubik: A Remote Server has addresses: ");
618 for ( i=0; i < UBIK_MAX_INTERFACE_ADDR && ts->addr[i]; i++)
619 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
628 struct ubik_server *ts;
631 ubik_print("Local CellServDB:");
632 for ( ts=ubik_servers; ts; ts= ts->next, j++)
634 ubik_print("Server %d: ", j);
635 for ( i=0; (i<UBIK_MAX_INTERFACE_ADDR) && ts->addr[i]; i++)
636 ubik_print("%s ", afs_inet_ntoa(ts->addr[i]));
641 SDISK_SetVersion(rxcall, atid, oldversionp, newversionp)
642 struct rx_call *rxcall;
643 struct ubik_tid *atid;
644 struct ubik_version *oldversionp;
645 struct ubik_version *newversionp;
648 struct ubik_dbase *dbase;
650 if ((code = ubik_CheckAuth(rxcall))) {
654 if (!ubik_currentTrans) {
657 /* sanity check to make sure only write trans appear here */
658 if (ubik_currentTrans->type != UBIK_WRITETRANS) {
662 /* Should not get this for the sync site */
663 if (ubeacon_AmSyncSite()) {
667 dbase = ubik_currentTrans->dbase;
669 urecovery_CheckTid(atid);
670 if (!ubik_currentTrans) {
675 /* Set the label if its version matches the sync-site's */
676 if ((oldversionp->epoch == ubik_dbVersion.epoch) &&
677 (oldversionp->counter == ubik_dbVersion.counter)) {
678 code = (*dbase->setlabel) (ubik_dbase, 0, newversionp);
680 ubik_dbase->version = *newversionp;
681 ubik_dbVersion = *newversionp;