From 8a09c220f4c5f881ea45be585b07b793038924d5 Mon Sep 17 00:00:00 2001 From: Derrick Brashear Date: Tue, 16 Feb 2010 01:13:57 -0500 Subject: [PATCH] ubik sync client error recovery give ubik server "client mode' error recovery for token errors Change-Id: Ibd6cad6ecf067da7da5724491756576d1ffedb03 Reviewed-on: http://gerrit.openafs.org/3150 Reviewed-by: Derrick Brashear Tested-by: BuildBot --- src/auth/cellconfig.c | 33 +++++++++++++++++ src/auth/cellconfig.p.h | 1 + src/ubik/Makefile.in | 2 +- src/ubik/beacon.c | 94 ++++++++++++++++++++++++++++++++++++------------- src/ubik/recovery.c | 7 ++-- src/ubik/ubik.c | 12 +++---- src/ubik/ubik.p.h | 4 ++- 7 files changed, 119 insertions(+), 34 deletions(-) diff --git a/src/auth/cellconfig.c b/src/auth/cellconfig.c index 25c0a84..5830f91 100644 --- a/src/auth/cellconfig.c +++ b/src/auth/cellconfig.c @@ -1500,6 +1500,39 @@ afsconf_GetLocalCell(struct afsconf_dir *adir, char *aname, } int +afsconf_UpToDate(struct afsconf_dir *adir) +{ + char tbuffer[256]; +#ifdef AFS_NT40_ENV + char *p; +#endif + struct stat tstat; + afs_int32 code = 0; /* default to not up to date */ + LOCK_GLOBAL_MUTEX; +#ifdef AFS_NT40_ENV + /* NT client config dir has no KeyFile; don't risk attempting open + * because there might be a random file of this name if dir is shared. + */ + if (IsClientConfigDirectory(adir->name)) { + /* Not a server, nothing to reread */ + code = 1; + } else { +#endif + strcompose(tbuffer, 256, adir->name, "/", AFSDIR_KEY_FILE, NULL); + + /* did file change? */ + code = stat(tbuffer, &tstat); + if ((code == 0) && (tstat.st_mtime <= adir->timeRead)) { + code = 1; + } +#ifdef AFS_NT40_ENV + } +#endif + UNLOCK_GLOBAL_MUTEX; + return code; +} + +int afsconf_Close(struct afsconf_dir *adir) { LOCK_GLOBAL_MUTEX; diff --git a/src/auth/cellconfig.p.h b/src/auth/cellconfig.p.h index 056f7fa..65b0244 100644 --- a/src/auth/cellconfig.p.h +++ b/src/auth/cellconfig.p.h @@ -111,6 +111,7 @@ extern int afsconf_GetCellInfo(struct afsconf_dir *adir, char *acellName, extern int afsconf_GetLocalCell(struct afsconf_dir *adir, char *aname, afs_int32 alen); extern int afsconf_Close(struct afsconf_dir *adir); +extern int afsconf_UpToDate(struct afsconf_dir *adir); extern int afsconf_IntGetKeys(struct afsconf_dir *adir); extern int afsconf_GetKeys(struct afsconf_dir *adir, struct afsconf_keys *astr); diff --git a/src/ubik/Makefile.in b/src/ubik/Makefile.in index 6b8fe74..e23d85f 100644 --- a/src/ubik/Makefile.in +++ b/src/ubik/Makefile.in @@ -22,7 +22,7 @@ INCLS=${TOP_INCDIR}/lwp.h ${TOP_INCDIR}/lock.h \ ${TOP_INCDIR}/rx/rx.h ${TOP_INCDIR}/rx/xdr.h \ ${TOP_INCDIR}/lock.h ubik.h ubik_int.h -LIBS=${TOP_LIBDIR}/librx.a ${TOP_LIBDIR}/liblwp.a \ +LIBS=${TOP_LIBDIR}/libauth.a ${TOP_LIBDIR}/librx.a ${TOP_LIBDIR}/liblwp.a \ ${TOP_LIBDIR}/libcom_err.a ${TOP_LIBDIR}/libcmd.a \ ${TOP_LIBDIR}/util.a ${TOP_LIBDIR}/libsys.a ${XLIBS} diff --git a/src/ubik/beacon.c b/src/ubik/beacon.c index e803d1f..2f7d66e 100644 --- a/src/ubik/beacon.c +++ b/src/ubik/beacon.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #ifndef AFS_NT40_ENV @@ -172,6 +173,52 @@ ubeacon_InitServerList(afs_uint32 ame, afs_uint32 aservers[]) return code; } +void +ubeacon_InitSecurityClass(void) +{ + int i; + /* get the security index to use, if we can */ + if (ubik_CRXSecurityProc) { + i = (*ubik_CRXSecurityProc) (ubik_CRXSecurityRock, &ubikSecClass, + &ubikSecIndex); + } else + i = 1; + if (i) { + /* don't have sec module yet */ + ubikSecIndex = 0; + ubikSecClass = rxnull_NewClientSecurityObject(); + } +} + +void +ubeacon_ReinitServer(struct ubik_server *ts) +{ + if (!afsconf_UpToDate(ubik_CRXSecurityRock)) { + struct rx_connection *disk_rxcid; + struct rx_connection *vote_rxcid; + struct rx_connection *tmp; + ubeacon_InitSecurityClass(); + disk_rxcid = + rx_NewConnection(rx_HostOf(rx_PeerOf(ts->disk_rxcid)), + ubik_callPortal, DISK_SERVICE_ID, + ubikSecClass, ubikSecIndex); + if (disk_rxcid) { + tmp = ts->disk_rxcid; + ts->disk_rxcid = disk_rxcid; + rx_PutConnection(tmp); + } + vote_rxcid = + rx_NewConnection(rx_HostOf(rx_PeerOf(ts->vote_rxcid)), + ubik_callPortal, VOTE_SERVICE_ID, + ubikSecClass, ubikSecIndex); + if (vote_rxcid) { + tmp = ts->vote_rxcid; + ts->vote_rxcid = vote_rxcid; + rx_PutConnection(tmp); + } + } +} + /*! * \brief setup server list * @@ -212,17 +259,8 @@ ubeacon_InitServerListCommon(afs_uint32 ame, struct afsconf_cell *info, if ((code = verifyInterfaceAddress(&ame, info, aservers))) return code; - /* get the security index to use, if we can */ - if (ubik_CRXSecurityProc) { - i = (*ubik_CRXSecurityProc) (ubik_CRXSecurityRock, &ubikSecClass, - &ubikSecIndex); - } else - i = 1; - if (i) { - /* don't have sec module yet */ - ubikSecIndex = 0; - ubikSecClass = rxnull_NewClientSecurityObject(); - } + ubeacon_InitSecurityClass(); + magicHost = ntohl(ame); /* do comparisons in host order */ magicServer = (struct ubik_server *)0; @@ -433,18 +471,26 @@ ubeacon_Interact(void *dummy) * the vote was computed, *not* the time the vote expires. We compute * the latter down below if we got enough votes to go with */ if (code > 0) { - ts->lastVoteTime = code; - if (code < oldestYesVote) - oldestYesVote = code; - ts->lastVote = 1; - if (!ts->isClone) - yesVotes += 2; - if (ts->magic) - yesVotes++; /* the extra epsilon */ - ts->up = 1; /* server is up (not really necessary: recovery does this for real) */ - ts->beaconSinceDown = 1; - ubik_dprint("yes vote from host %s\n", - afs_inet_ntoa_r(ts->addr[0], hoststr)); + if ((code & ~0xff) == ERROR_TABLE_BASE_RXK) { + ubik_dprint("token error %d from host %s\n", + code, afs_inet_ntoa_r(ts->addr[0], hoststr)); + ts->up = 0; + ts->beaconSinceDown = 0; + urecovery_LostServer(ts); + } else { + ts->lastVoteTime = code; + if (code < oldestYesVote) + oldestYesVote = code; + ts->lastVote = 1; + if (!ts->isClone) + yesVotes += 2; + if (ts->magic) + yesVotes++; /* the extra epsilon */ + ts->up = 1; /* server is up (not really necessary: recovery does this for real) */ + ts->beaconSinceDown = 1; + ubik_dprint("yes vote from host %s\n", + afs_inet_ntoa_r(ts->addr[0], hoststr)); + } } else if (code == 0) { ts->lastVoteTime = temp; ts->lastVote = 0; @@ -454,7 +500,7 @@ ubeacon_Interact(void *dummy) } else if (code < 0) { ts->up = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); + urecovery_LostServer(ts); ubik_dprint("time out from %s\n", afs_inet_ntoa_r(ts->addr[0], hoststr)); } diff --git a/src/ubik/recovery.c b/src/ubik/recovery.c index e47beb5..cc5d951 100644 --- a/src/ubik/recovery.c +++ b/src/ubik/recovery.c @@ -31,6 +31,7 @@ #include #include #include +#include #define UBIK_INTERNALS #include "ubik.h" @@ -84,13 +85,15 @@ urecovery_ResetState(void) * \brief sync site * * routine called when a non-sync site server goes down; restarts recovery - * process to send missing server the new db when it comes back up. + * process to send missing server the new db when it comes back up for + * non-sync site servers. * * \note This routine should not do anything with variables used by non-sync site servers. */ int -urecovery_LostServer(void) +urecovery_LostServer(struct ubik_server *ts) { + ubeacon_ReinitServer(ts); #if !defined(AFS_PTHREAD_ENV) /* No corresponding LWP_WaitProcess found anywhere for this -- klm */ LWP_NoYieldSignal(&urecovery_state); diff --git a/src/ubik/ubik.c b/src/ubik/ubik.c index 082b437..bd25f48 100644 --- a/src/ubik/ubik.c +++ b/src/ubik/ubik.c @@ -163,7 +163,7 @@ ContactQuorum_NoArguments(afs_int32 (*proc)(struct rx_connection *, ubik_tid *), ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ @@ -209,7 +209,7 @@ ContactQuorum_DISK_Lock(struct ubik_trans *atrans, int aflags,afs_int32 file, ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ @@ -255,7 +255,7 @@ ContactQuorum_DISK_Write(struct ubik_trans *atrans, int aflags, ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ @@ -301,7 +301,7 @@ ContactQuorum_DISK_Truncate(struct ubik_trans *atrans, int aflags, ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ @@ -382,7 +382,7 @@ ContactQuorum_DISK_WriteV(struct ubik_trans *atrans, int aflags, ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ @@ -429,7 +429,7 @@ ContactQuorum_DISK_SetVersion(struct ubik_trans *atrans, int aflags, ts->up = 0; /* mark as down now; beacons will no longer be sent */ ts->currentDB = 0; ts->beaconSinceDown = 0; - urecovery_LostServer(); /* tell recovery to try to resend dbase later */ + urecovery_LostServer(ts); /* tell recovery to try to resend dbase later */ } else { /* success */ if (!ts->isClone) okcalls++; /* count up how many worked */ diff --git a/src/ubik/ubik.p.h b/src/ubik/ubik.p.h index ebe209a..0f4ebb7 100644 --- a/src/ubik/ubik.p.h +++ b/src/ubik/ubik.p.h @@ -368,7 +368,7 @@ extern void uphys_invalidate(struct ubik_dbase *adbase, /*! \name recovery.c */ extern int urecovery_ResetState(void); -extern int urecovery_LostServer(void); +extern int urecovery_LostServer(struct ubik_server *server); extern int urecovery_AllBetter(struct ubik_dbase *adbase, int areadAny); extern int urecovery_AbortAll(struct ubik_dbase *adbase); @@ -417,6 +417,8 @@ extern afs_uint32 ubikGetPrimaryInterfaceAddr(afs_uint32 addr); /*! \name beacon.c */ struct afsconf_cell; +extern void ubeacon_InitSecurityClass(void); +extern void ubeacon_ReinitServer(struct ubik_server *ts); extern void ubeacon_Debug(struct ubik_debug *aparm); extern int ubeacon_AmSyncSite(void); extern int ubeacon_InitServerListByInfo(afs_uint32 ame, -- 1.9.4