#include <afsconfig.h>
#include "afs/param.h"
-RCSID
- ("$Header$");
#include "afs/stds.h"
#include "afs/sysincludes.h" /* Standard vendor system headers */
#ifdef AFS_SGI62_ENV
#include "h/hashing.h"
#endif
-#if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN60_ENV)
+#if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
#include <netinet/in_var.h>
#endif
#endif /* !UKERNEL */
VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
{
struct vrequest treq;
- struct conn *tconn;
+ struct afs_conn *tconn;
int i, type = 0;
union {
struct vldbentry tve;
return (changed ? DIFFERENT : SAME);
} /*VLDB_Same */
+/*------------------------------------------------------------------------
+ * afs_BlackListOnce
+ *
+ * Description:
+ * Mark a server as invalid for further attempts of this request only.
+ *
+ * Arguments:
+ * areq : The request record associated with this operation.
+ * afid : The FID of the file involved in the action. This argument
+ * may be null if none was involved.
+ * tsp : pointer to a server struct for the server we wish to
+ * blacklist.
+ *
+ * Returns:
+ * Non-zero value if further servers are available to try,
+ * zero otherwise.
+ *
+ * Environment:
+ * This routine is typically called in situations where we believe
+ * one server out of a pool may have an error condition.
+ *
+ * Side Effects:
+ * As advertised.
+ *
+ * NOTE:
+ * The afs_Conn* routines use the list of invalidated servers to
+ * avoid reusing a server marked as invalid for this request.
+ *------------------------------------------------------------------------*/
+static afs_int32
+afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
+ struct server *tsp)
+{
+ struct volume *tvp;
+ afs_int32 i;
+ afs_int32 serversleft = 0;
+
+ if (afid) {
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (tvp) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i] == tsp) {
+ areq->skipserver[i] = 1;
+ }
+ if (tvp->serverHost[i] &&
+ (tvp->serverHost[i]->addr->sa_flags &
+ SRVR_ISDOWN)) {
+ areq->skipserver[i] = 1;
+ }
+ }
+ afs_PutVolume(tvp, READ_LOCK);
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
+ serversleft = 1;
+ break;
+ }
+ }
+ return serversleft;
+ }
+ }
+ return 1;
+}
+
/*------------------------------------------------------------------------
* EXPORTED afs_Analyze
* if this is a temporary or permanent error.
*------------------------------------------------------------------------*/
int
-afs_Analyze(register struct conn *aconn, afs_int32 acode,
+afs_Analyze(register struct afs_conn *aconn, afs_int32 acode,
struct VenusFid *afid, register struct vrequest *areq, int op,
afs_int32 locktype, struct cell *cellp)
{
struct server *tsp;
struct volume *tvp;
afs_int32 shouldRetry = 0;
+ afs_int32 serversleft = 1;
struct afs_stats_RPCErrors *aerrP;
-
+ afs_int32 markeddown;
+
+
+
+ if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
+ /* On reconnection, act as connected. XXX: for now.... */
+ /* SXW - This may get very tired after a while. We should try and
+ * intercept all RPCs before they get here ... */
+ /*printf("afs_Analyze: disconnected\n");*/
+ afs_FinalizeReq(areq);
+ if (aconn) {
+ /* SXW - I suspect that this will _never_ happen - we shouldn't
+ * get a connection because we're disconnected !!!*/
+ afs_PutConn(aconn, locktype);
+ }
+ return 0;
+ }
+
AFS_STATCNT(afs_Analyze);
afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
&& tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
cell->cellName : ""));
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
tvp->status[i] = not_busy;
}
return shouldRetry; /* should retry */
}
- if (!aconn) {
+ if (!aconn || !aconn->srvr) {
if (!areq->volumeError) {
if (aerrP)
(aerrP->err_Network)++;
if (areq->volumeError) {
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->serverHost[i] == tsp) {
tvp->status[i] = not_busy;
}
acode = 455;
#endif /* AFS_64BIT_CLIENT */
if ((acode < 0) && (acode != VRESTARTING)) {
- afs_ServerDown(sa);
- ForceNewConnections(sa); /*multi homed clients lock:afs_xsrvAddr? */
+ if (acode == RX_CALL_TIMEOUT) {
+ serversleft = afs_BlackListOnce(areq, afid, tsp);
+ if (afid)
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (!afid || !tvp || (tvp->states & VRO))
+ areq->idleError++;
+ if ((serversleft == 0) && tvp &&
+ ((tvp->states & VRO) || (tvp->states & VBackup))) {
+ shouldRetry = 0;
+ } else {
+ shouldRetry = 1;
+ }
+ if (tvp)
+ afs_PutVolume(tvp, READ_LOCK);
+ /* By doing this, we avoid ever marking a server down
+ * in an idle timeout case. That's because the server is
+ * still responding and may only be letting a single vnode
+ * time out. We otherwise risk having the server continually
+ * be marked down, then up, then down again...
+ */
+ goto out;
+ }
+ markeddown = afs_ServerDown(sa);
+ ForceNewConnections(sa); /**multi homed clients lock:afs_xsrvAddr? */
if (aerrP)
(aerrP->err_Server)++;
+#if 0
+ /* retry *once* when the server is timed out in case of NAT */
+ if (markeddown && acode == RX_CALL_DEAD) {
+ aconn->forceConnectFS = 1;
+ shouldRetry = 1;
+ }
+#endif
}
if (acode == VBUSY || acode == VRESTARTING) {
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->serverHost[i] == tsp) {
tvp->status[i] = rdwr_busy; /* can't tell which yet */
/* to tell which, have to look at the op code. */
|| (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
/* any rxkad error is treated as token expiration */
struct unixuser *tu;
-
/*
* I'm calling these errors protection errors, since they involve
* faulty authentication.
tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
if (tu) {
- if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
+ if (acode == VICETOKENDEAD) {
+ aconn->forceConnectFS = 1;
+ } else if (acode == RXKADEXPIRED) {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->user->states |= UTokensBad;
afs_warnuser
("afs: Tokens for user of AFS id %d for cell %s have expired\n",
tu->vid, aconn->srvr->server->cell->cellName);
- else
- afs_warnuser
- ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
- tu->vid, aconn->srvr->server->cell->cellName, acode);
+ } else {
+ serversleft = afs_BlackListOnce(areq, afid, tsp);
+ areq->tokenError++;
+
+ if (serversleft) {
+ afs_warnuser
+ ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d\n",
+ tu->vid, aconn->srvr->server->cell->cellName, acode);
+ shouldRetry = 1;
+ } else {
+ areq->tokenError = 0;
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->user->states |= UTokensBad;
+ afs_warnuser
+ ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
+ tu->vid, aconn->srvr->server->cell->cellName, acode);
+ }
+ }
afs_PutUser(tu, READ_LOCK);
} else {
/* The else case shouldn't be possible and should probably be replaced by a panic? */
- if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
+ if (acode == VICETOKENDEAD) {
+ aconn->forceConnectFS = 1;
+ } else if (acode == RXKADEXPIRED) {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->user->states |= UTokensBad;
afs_warnuser
("afs: Tokens for user %d for cell %s have expired\n",
areq->uid, aconn->srvr->server->cell->cellName);
- else
+ } else {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->user->states |= UTokensBad;
afs_warnuser
("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
areq->uid, aconn->srvr->server->cell->cellName, acode);
+ }
}
- aconn->forceConnectFS = 0; /* don't check until new tokens set */
- aconn->user->states |= UTokensBad;
shouldRetry = 1; /* Try again (as root). */
}
/* Check for access violation. */
same = VLDB_Same(afid, areq);
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS && tvp->serverHost[i]; i++) {
+ for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
if (tvp->serverHost[i] == tsp) {
if (tvp->status[i] == end_not_busy)
tvp->status[i] = offline;
VSleep(1); /* Just a hack for desperate times. */
shouldRetry = 1;
}
-
+out:
/* now unlock the connection and return */
afs_PutConn(aconn, locktype);
return (shouldRetry);