/*
* Copyright 2000, International Business Machines Corporation and others.
* All Rights Reserved.
- *
+ *
* This software has been released under the terms of the IBM Public
* License. For details, see the LICENSE file in the top-level source
* directory or online at http://www.openafs.org/dl/license10.html
#include <afsconfig.h>
#include "afs/param.h"
-RCSID
- ("$Header$");
#include "afs/stds.h"
#include "afs/sysincludes.h" /* Standard vendor system headers */
#ifdef AFS_SGI62_ENV
#include "h/hashing.h"
#endif
-#if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN60_ENV)
+#if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
#include <netinet/in_var.h>
#endif
#endif /* !UKERNEL */
#include "afs/afs_util.h"
#include "afs/unified_afs.h"
-#if defined(AFS_SUN56_ENV)
+#if defined(AFS_SUN5_ENV)
#include <inet/led.h>
#include <inet/common.h>
-#if defined(AFS_SUN58_ENV)
#include <netinet/ip6.h>
-#endif
#include <inet/ip.h>
#endif
-
/* shouldn't do it this way, but for now will do */
#ifndef ERROR_TABLE_BASE_U
#define ERROR_TABLE_BASE_U (5376L)
#endif /* vlserver error base define */
-int afs_BusyWaitPeriod = 15; /* poll every 15 seconds */
-
-afs_int32 hm_retry_RO = 0; /* don't wait */
-afs_int32 hm_retry_RW = 0; /* don't wait */
-afs_int32 hm_retry_int = 0; /* don't wait */
-
-static int et2sys[512];
-
-void
-init_et_to_sys_error(void)
-{
- memset(&et2sys, 0, sizeof(et2sys));
- et2sys[(UAEPERM - ERROR_TABLE_BASE_uae)] = EPERM;
- et2sys[(UAENOENT - ERROR_TABLE_BASE_uae)] = ENOENT;
- et2sys[(UAESRCH - ERROR_TABLE_BASE_uae)] = ESRCH;
- et2sys[(UAEINTR - ERROR_TABLE_BASE_uae)] = EINTR;
- et2sys[(UAEIO - ERROR_TABLE_BASE_uae)] = EIO;
- et2sys[(UAENXIO - ERROR_TABLE_BASE_uae)] = ENXIO;
- et2sys[(UAE2BIG - ERROR_TABLE_BASE_uae)] = E2BIG;
- et2sys[(UAENOEXEC - ERROR_TABLE_BASE_uae)] = ENOEXEC;
- et2sys[(UAEBADF - ERROR_TABLE_BASE_uae)] = EBADF;
- et2sys[(UAECHILD - ERROR_TABLE_BASE_uae)] = ECHILD;
- et2sys[(UAEAGAIN - ERROR_TABLE_BASE_uae)] = EAGAIN;
- et2sys[(UAENOMEM - ERROR_TABLE_BASE_uae)] = ENOMEM;
- et2sys[(UAEACCES - ERROR_TABLE_BASE_uae)] = EACCES;
- et2sys[(UAEFAULT - ERROR_TABLE_BASE_uae)] = EFAULT;
- et2sys[(UAENOTBLK - ERROR_TABLE_BASE_uae)] = ENOTBLK;
- et2sys[(UAEBUSY - ERROR_TABLE_BASE_uae)] = EBUSY;
- et2sys[(UAEEXIST - ERROR_TABLE_BASE_uae)] = EEXIST;
- et2sys[(UAEXDEV - ERROR_TABLE_BASE_uae)] = EXDEV;
- et2sys[(UAENODEV - ERROR_TABLE_BASE_uae)] = ENODEV;
- et2sys[(UAENOTDIR - ERROR_TABLE_BASE_uae)] = ENOTDIR;
- et2sys[(UAEISDIR - ERROR_TABLE_BASE_uae)] = EISDIR;
- et2sys[(UAEINVAL - ERROR_TABLE_BASE_uae)] = EINVAL;
- et2sys[(UAENFILE - ERROR_TABLE_BASE_uae)] = ENFILE;
- et2sys[(UAEMFILE - ERROR_TABLE_BASE_uae)] = EMFILE;
- et2sys[(UAENOTTY - ERROR_TABLE_BASE_uae)] = ENOTTY;
- et2sys[(UAETXTBSY - ERROR_TABLE_BASE_uae)] = ETXTBSY;
- et2sys[(UAEFBIG - ERROR_TABLE_BASE_uae)] = EFBIG;
- et2sys[(UAENOSPC - ERROR_TABLE_BASE_uae)] = ENOSPC;
- et2sys[(UAESPIPE - ERROR_TABLE_BASE_uae)] = ESPIPE;
- et2sys[(UAEROFS - ERROR_TABLE_BASE_uae)] = EROFS;
- et2sys[(UAEMLINK - ERROR_TABLE_BASE_uae)] = EMLINK;
- et2sys[(UAEPIPE - ERROR_TABLE_BASE_uae)] = EPIPE;
- et2sys[(UAEDOM - ERROR_TABLE_BASE_uae)] = EDOM;
- et2sys[(UAERANGE - ERROR_TABLE_BASE_uae)] = ERANGE;
- et2sys[(UAEDEADLK - ERROR_TABLE_BASE_uae)] = EDEADLK;
- et2sys[(UAENAMETOOLONG - ERROR_TABLE_BASE_uae)] = ENAMETOOLONG;
- et2sys[(UAENOLCK - ERROR_TABLE_BASE_uae)] = ENOLCK;
- et2sys[(UAENOSYS - ERROR_TABLE_BASE_uae)] = ENOSYS;
- et2sys[(UAENOTEMPTY - ERROR_TABLE_BASE_uae)] = ENOTEMPTY;
- et2sys[(UAELOOP - ERROR_TABLE_BASE_uae)] = ELOOP;
- et2sys[(UAEWOULDBLOCK - ERROR_TABLE_BASE_uae)] = EWOULDBLOCK;
- et2sys[(UAENOMSG - ERROR_TABLE_BASE_uae)] = ENOMSG;
- et2sys[(UAEIDRM - ERROR_TABLE_BASE_uae)] = EIDRM;
- et2sys[(UAECHRNG - ERROR_TABLE_BASE_uae)] = ECHRNG;
- et2sys[(UAEL2NSYNC - ERROR_TABLE_BASE_uae)] = EL2NSYNC;
- et2sys[(UAEL3HLT - ERROR_TABLE_BASE_uae)] = EL3HLT;
- et2sys[(UAEL3RST - ERROR_TABLE_BASE_uae)] = EL3RST;
- et2sys[(UAELNRNG - ERROR_TABLE_BASE_uae)] = ELNRNG;
- et2sys[(UAEUNATCH - ERROR_TABLE_BASE_uae)] = EUNATCH;
- et2sys[(UAENOCSI - ERROR_TABLE_BASE_uae)] = ENOCSI;
- et2sys[(UAEL2HLT - ERROR_TABLE_BASE_uae)] = EL2HLT;
- et2sys[(UAEBADE - ERROR_TABLE_BASE_uae)] = EBADE;
- et2sys[(UAEBADR - ERROR_TABLE_BASE_uae)] = EBADR;
- et2sys[(UAEXFULL - ERROR_TABLE_BASE_uae)] = EXFULL;
- et2sys[(UAENOANO - ERROR_TABLE_BASE_uae)] = ENOANO;
- et2sys[(UAEBADRQC - ERROR_TABLE_BASE_uae)] = EBADRQC;
- et2sys[(UAEBADSLT - ERROR_TABLE_BASE_uae)] = EBADSLT;
- et2sys[(UAEBFONT - ERROR_TABLE_BASE_uae)] = EBFONT;
- et2sys[(UAENOSTR - ERROR_TABLE_BASE_uae)] = ENOSTR;
- et2sys[(UAENODATA - ERROR_TABLE_BASE_uae)] = ENODATA;
- et2sys[(UAETIME - ERROR_TABLE_BASE_uae)] = ETIME;
- et2sys[(UAENOSR - ERROR_TABLE_BASE_uae)] = ENOSR;
- et2sys[(UAENONET - ERROR_TABLE_BASE_uae)] = ENONET;
- et2sys[(UAENOPKG - ERROR_TABLE_BASE_uae)] = ENOPKG;
- et2sys[(UAEREMOTE - ERROR_TABLE_BASE_uae)] = EREMOTE;
- et2sys[(UAENOLINK - ERROR_TABLE_BASE_uae)] = ENOLINK;
- et2sys[(UAEADV - ERROR_TABLE_BASE_uae)] = EADV;
- et2sys[(UAESRMNT - ERROR_TABLE_BASE_uae)] = ESRMNT;
- et2sys[(UAECOMM - ERROR_TABLE_BASE_uae)] = ECOMM;
- et2sys[(UAEPROTO - ERROR_TABLE_BASE_uae)] = EPROTO;
- et2sys[(UAEMULTIHOP - ERROR_TABLE_BASE_uae)] = EMULTIHOP;
- et2sys[(UAEDOTDOT - ERROR_TABLE_BASE_uae)] = EDOTDOT;
- et2sys[(UAEBADMSG - ERROR_TABLE_BASE_uae)] = EBADMSG;
- et2sys[(UAEOVERFLOW - ERROR_TABLE_BASE_uae)] = EOVERFLOW;
- et2sys[(UAENOTUNIQ - ERROR_TABLE_BASE_uae)] = ENOTUNIQ;
- et2sys[(UAEBADFD - ERROR_TABLE_BASE_uae)] = EBADFD;
- et2sys[(UAEREMCHG - ERROR_TABLE_BASE_uae)] = EREMCHG;
- et2sys[(UAELIBACC - ERROR_TABLE_BASE_uae)] = ELIBACC;
- et2sys[(UAELIBBAD - ERROR_TABLE_BASE_uae)] = ELIBBAD;
- et2sys[(UAELIBSCN - ERROR_TABLE_BASE_uae)] = ELIBSCN;
- et2sys[(UAELIBMAX - ERROR_TABLE_BASE_uae)] = ELIBMAX;
- et2sys[(UAELIBEXEC - ERROR_TABLE_BASE_uae)] = ELIBEXEC;
- et2sys[(UAEILSEQ - ERROR_TABLE_BASE_uae)] = EILSEQ;
- et2sys[(UAERESTART - ERROR_TABLE_BASE_uae)] = ERESTART;
- et2sys[(UAESTRPIPE - ERROR_TABLE_BASE_uae)] = ESTRPIPE;
- et2sys[(UAEUSERS - ERROR_TABLE_BASE_uae)] = EUSERS;
- et2sys[(UAENOTSOCK - ERROR_TABLE_BASE_uae)] = ENOTSOCK;
- et2sys[(UAEDESTADDRREQ - ERROR_TABLE_BASE_uae)] = EDESTADDRREQ;
- et2sys[(UAEMSGSIZE - ERROR_TABLE_BASE_uae)] = EMSGSIZE;
- et2sys[(UAEPROTOTYPE - ERROR_TABLE_BASE_uae)] = EPROTOTYPE;
- et2sys[(UAENOPROTOOPT - ERROR_TABLE_BASE_uae)] = ENOPROTOOPT;
- et2sys[(UAEPROTONOSUPPORT - ERROR_TABLE_BASE_uae)] = EPROTONOSUPPORT;
- et2sys[(UAESOCKTNOSUPPORT - ERROR_TABLE_BASE_uae)] = ESOCKTNOSUPPORT;
- et2sys[(UAEOPNOTSUPP - ERROR_TABLE_BASE_uae)] = EOPNOTSUPP;
- et2sys[(UAEPFNOSUPPORT - ERROR_TABLE_BASE_uae)] = EPFNOSUPPORT;
- et2sys[(UAEAFNOSUPPORT - ERROR_TABLE_BASE_uae)] = EAFNOSUPPORT;
- et2sys[(UAEADDRINUSE - ERROR_TABLE_BASE_uae)] = EADDRINUSE;
- et2sys[(UAEADDRNOTAVAIL - ERROR_TABLE_BASE_uae)] = EADDRNOTAVAIL;
- et2sys[(UAENETDOWN - ERROR_TABLE_BASE_uae)] = ENETDOWN;
- et2sys[(UAENETUNREACH - ERROR_TABLE_BASE_uae)] = ENETUNREACH;
- et2sys[(UAENETRESET - ERROR_TABLE_BASE_uae)] = ENETRESET;
- et2sys[(UAECONNABORTED - ERROR_TABLE_BASE_uae)] = ECONNABORTED;
- et2sys[(UAECONNRESET - ERROR_TABLE_BASE_uae)] = ECONNRESET;
- et2sys[(UAENOBUFS - ERROR_TABLE_BASE_uae)] = ENOBUFS;
- et2sys[(UAEISCONN - ERROR_TABLE_BASE_uae)] = EISCONN;
- et2sys[(UAENOTCONN - ERROR_TABLE_BASE_uae)] = ENOTCONN;
- et2sys[(UAESHUTDOWN - ERROR_TABLE_BASE_uae)] = ESHUTDOWN;
- et2sys[(UAETOOMANYREFS - ERROR_TABLE_BASE_uae)] = ETOOMANYREFS;
- et2sys[(UAETIMEDOUT - ERROR_TABLE_BASE_uae)] = ETIMEDOUT;
- et2sys[(UAECONNREFUSED - ERROR_TABLE_BASE_uae)] = ECONNREFUSED;
- et2sys[(UAEHOSTDOWN - ERROR_TABLE_BASE_uae)] = EHOSTDOWN;
- et2sys[(UAEHOSTUNREACH - ERROR_TABLE_BASE_uae)] = EHOSTUNREACH;
- et2sys[(UAEALREADY - ERROR_TABLE_BASE_uae)] = EALREADY;
- et2sys[(UAEINPROGRESS - ERROR_TABLE_BASE_uae)] = EINPROGRESS;
- et2sys[(UAESTALE - ERROR_TABLE_BASE_uae)] = ESTALE;
- et2sys[(UAEUCLEAN - ERROR_TABLE_BASE_uae)] = EUCLEAN;
- et2sys[(UAENOTNAM - ERROR_TABLE_BASE_uae)] = ENOTNAM;
- et2sys[(UAENAVAIL - ERROR_TABLE_BASE_uae)] = ENAVAIL;
- et2sys[(UAEISNAM - ERROR_TABLE_BASE_uae)] = EISNAM;
- et2sys[(UAEREMOTEIO - ERROR_TABLE_BASE_uae)] = EREMOTEIO;
- et2sys[(UAEDQUOT - ERROR_TABLE_BASE_uae)] = EDQUOT;
- et2sys[(UAENOMEDIUM - ERROR_TABLE_BASE_uae)] = ENOMEDIUM;
- et2sys[(UAEMEDIUMTYPE - ERROR_TABLE_BASE_uae)] = EMEDIUMTYPE;
-}
-
-static afs_int32
-et_to_sys_error(afs_int32 in)
-{
- if (in < ERROR_TABLE_BASE_uae || in >= ERROR_TABLE_BASE_uae + 512)
- return in;
- if (et2sys[in - ERROR_TABLE_BASE_uae] != 0)
- return et2sys[in - ERROR_TABLE_BASE_uae];
- return in;
-}
-
-void
-afs_CopyError(register struct vrequest *afrom, register struct vrequest *ato)
-{
- AFS_STATCNT(afs_CopyError);
- if (!afrom->initd)
- return;
- afs_FinalizeReq(ato);
- if (afrom->accessError)
- ato->accessError = 1;
- if (afrom->volumeError)
- ato->volumeError = 1;
- if (afrom->networkError)
- ato->networkError = 1;
- if (afrom->permWriteError)
- ato->permWriteError = 1;
-
-}
-
-void
-afs_FinalizeReq(register struct vrequest *areq)
-{
- AFS_STATCNT(afs_FinalizeReq);
- if (areq->initd)
- return;
- areq->busyCount = 0;
- areq->accessError = 0;
- areq->volumeError = 0;
- areq->networkError = 0;
- areq->permWriteError = 0;
- areq->initd = 1;
-
-}
-
-int
-afs_CheckCode(afs_int32 acode, struct vrequest *areq, int where)
-{
- AFS_STATCNT(afs_CheckCode);
- if (acode) {
- afs_Trace2(afs_iclSetp, CM_TRACE_CHECKCODE, ICL_TYPE_INT32, acode,
- ICL_TYPE_INT32, where);
- }
- if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
- acode = et_to_sys_error(acode);
- if (!areq || !areq->initd)
- return acode;
- if (areq->networkError)
- return ETIMEDOUT;
- if (acode == 0)
- return 0;
- if (areq->accessError)
- return EACCES;
- if (areq->volumeError == VOLMISSING)
- return ENODEV;
- if (areq->volumeError == VOLBUSY)
- return EWOULDBLOCK;
- if (acode == VNOVNODE)
- return ENOENT;
- if (acode == VDISKFULL)
- return ENOSPC;
- if (acode == VOVERQUOTA)
- return
-#ifdef EDQUOT
- EDQUOT
-#else
- ENOSPC
-#endif
- ;
-
- return acode;
-
-} /*afs_CheckCode */
+int afs_BusyWaitPeriod = 15; /**< poll period, in seconds */
+afs_int32 hm_retry_RO = 0; /**< enable read-only hard-mount retry */
+afs_int32 hm_retry_RW = 0; /**< enable read-write hard-mount retry */
+afs_int32 hm_retry_int = 0; /**< hard-mount retry interval, in seconds */
#define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
int lastcode;
-/* returns:
- * 0 if the vldb record for a specific volume is different from what
- * we have cached -- perhaps the volume has moved.
- * 1 if the vldb record is the same
- * 2 if we can't tell if it's the same or not.
- *
- * If 0, the caller will probably start over at the beginning of our
- * list of servers for this volume and try to find one that is up. If
- * not 0, we will probably just keep plugging with what we have
- * cached. If we fail to contact the VL server, we should just keep
- * trying with the information we have, rather than failing. */
#define DIFFERENT 0
#define SAME 1
#define DUNNO 2
+/*!
+ * \brief
+ * Request vldb record to determined if it has changed.
+ *
+ * \retval 0 if the vldb record for a specific volume is different from what
+ * we have cached -- perhaps the volume has moved.
+ * \retval 1 if the vldb record is the same
+ * \retval 2 if we can't tell if it's the same or not.
+ *
+ * \note
+ * If 0 returned, the caller will probably start over at the beginning of our
+ * list of servers for this volume and try to find one that is up. If
+ * not 0, we will probably just keep plugging with what we have
+ * cached. If we fail to contact the VL server, we should just keep
+ * trying with the information we have, rather than failing.
+ */
static int
VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
{
struct vrequest treq;
- struct conn *tconn;
+ struct afs_conn *tconn;
int i, type = 0;
union {
struct vldbentry tve;
char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
unsigned int changed;
struct server *(oldhosts[NMAXNSERVERS]);
+ struct rx_connection *rxconn;
AFS_STATCNT(CheckVLDB);
afs_FinalizeReq(areq);
if ((i = afs_InitReq(&treq, afs_osi_credp)))
return DUNNO;
v = afs_osi_Alloc(sizeof(*v));
+ osi_Assert(v != NULL);
tcell = afs_GetCell(afid->Cell, READ_LOCK);
bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
do {
VSleep(2); /* Better safe than sorry. */
tconn =
afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
- &treq, SHARED_LOCK);
+ &treq, SHARED_LOCK, 0, &rxconn);
if (tconn) {
- if (tconn->srvr->server->flags & SNO_LHOSTS) {
+ if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
type = 0;
RX_AFS_GUNLOCK();
- i = VL_GetEntryByNameO(tconn->id, bp, &v->tve);
+ i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
RX_AFS_GLOCK();
- } else if (tconn->srvr->server->flags & SYES_LHOSTS) {
+ } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
type = 1;
RX_AFS_GUNLOCK();
- i = VL_GetEntryByNameN(tconn->id, bp, &v->ntve);
+ i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
RX_AFS_GLOCK();
} else {
type = 2;
RX_AFS_GUNLOCK();
- i = VL_GetEntryByNameU(tconn->id, bp, &v->utve);
+ i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
RX_AFS_GLOCK();
- if (!(tconn->srvr->server->flags & SVLSRV_UUID)) {
+ if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
if (i == RXGEN_OPCODE) {
type = 1;
RX_AFS_GUNLOCK();
- i = VL_GetEntryByNameN(tconn->id, bp, &v->ntve);
+ i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
RX_AFS_GLOCK();
if (i == RXGEN_OPCODE) {
type = 0;
- tconn->srvr->server->flags |= SNO_LHOSTS;
+ tconn->parent->srvr->server->flags |= SNO_LHOSTS;
RX_AFS_GUNLOCK();
- i = VL_GetEntryByNameO(tconn->id, bp, &v->tve);
+ i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
RX_AFS_GLOCK();
} else if (!i)
- tconn->srvr->server->flags |= SYES_LHOSTS;
+ tconn->parent->srvr->server->flags |= SYES_LHOSTS;
} else if (!i)
- tconn->srvr->server->flags |= SVLSRV_UUID;
+ tconn->parent->srvr->server->flags |= SVLSRV_UUID;
}
lastcode = i;
}
} else
i = -1;
- } while (afs_Analyze(tconn, i, NULL, &treq, -1, /* no op code for this */
+ } while (afs_Analyze(tconn, rxconn, i, NULL, &treq, -1, /* no op code for this */
SHARED_LOCK, tcell));
afs_PutCell(tcell, READ_LOCK);
for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
oldhosts[i] = tvp->serverHost[i];
}
+ ReleaseWriteLock(&tvp->lock);
if (type == 2) {
- InstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq);
+ LockAndInstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq);
} else if (type == 1) {
- InstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
+ LockAndInstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
} else {
- InstallVolumeEntry(tvp, &v->tve, afid->Cell);
+ LockAndInstallVolumeEntry(tvp, &v->tve, afid->Cell);
}
if (i < NMAXNSERVERS && tvp->serverHost[i]) {
return (changed ? DIFFERENT : SAME);
} /*VLDB_Same */
+/*!
+ * \brief
+ * Mark a server as invalid for further attempts of this request only.
+ *
+ * \param[in,out] areq The request record associated with this operation.
+ * \param[in] afid The FID of the file involved in the action. This argument
+ * may be null if none was involved.
+ * \param[in,out] tsp pointer to a server struct for the server we wish to
+ * blacklist.
+ *
+ * \returns
+ * Non-zero value if further servers are available to try,
+ * zero otherwise.
+ *
+ * \note
+ * This routine is typically called in situations where we believe
+ * one server out of a pool may have an error condition.
+ *
+ * \note
+ * The afs_Conn* routines use the list of invalidated servers to
+ * avoid reusing a server marked as invalid for this request.
+ */
+static afs_int32
+afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
+ struct server *tsp)
+{
+ struct volume *tvp;
+ afs_int32 i;
+ afs_int32 serversleft = 0;
+
+ if (afid) {
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (tvp) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i] == tsp) {
+ areq->skipserver[i] = 1;
+ }
+ if (tvp->serverHost[i] &&
+ (tvp->serverHost[i]->addr->sa_flags &
+ SRVR_ISDOWN)) {
+ areq->skipserver[i] = 1;
+ }
+ }
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
+ serversleft = 1;
+ break;
+ }
+ }
+ afs_PutVolume(tvp, READ_LOCK);
+ return serversleft;
+ }
+ }
+ return serversleft;
+}
+
+/*!
+ * \brief
+ * Analyze the outcome of an RPC operation, taking whatever support
+ * actions are necessary.
+ *
+ * \param[in] afid The FID of the file involved in the action. This argument
+ * may be null if none was involved.
+ * \param[in] op which RPC we are analyzing.
+ * \param[in,out] avp A pointer to the struct volume, if we already have one.
+ *
+ * \returns
+ * Non-zero value if the related RPC operation can be retried,
+ * zero otherwise.
+ *
+ * \note
+ * This routine is called when we got a network error,
+ * and discards state if the operation was a data-mutating
+ * operation.
+ */
+static int
+afs_ClearStatus(struct VenusFid *afid, int op, struct volume *avp)
+{
+ struct volume *tvp = NULL;
+
+ /* if it's not a write op, we have nothing to veto and shouldn't clear. */
+ if (!AFS_STATS_FS_RPCIDXES_ISWRITE(op)) {
+ return 1;
+ }
+
+ if (avp)
+ tvp = avp;
+ else if (afid)
+ tvp = afs_FindVolume(afid, READ_LOCK);
+
+ /* don't assume just discarding will fix if no cached volume */
+ if (tvp) {
+ struct vcache *tvc;
+ ObtainReadLock(&afs_xvcache);
+ if ((tvc = afs_FindVCache(afid, 0, 0))) {
+ ReleaseReadLock(&afs_xvcache);
+ tvc->f.states &= ~(CStatd | CUnique);
+ afs_PutVCache(tvc);
+ } else {
+ ReleaseReadLock(&afs_xvcache);
+ }
+ if (!avp)
+ afs_PutVolume(tvp, READ_LOCK);
+ }
-/*------------------------------------------------------------------------
- * EXPORTED afs_Analyze
+ if (AFS_STATS_FS_RPCIDXES_WRITE_RETRIABLE(op))
+ return 1;
+
+ /* not retriable: we may have raced ourselves */
+ return 0;
+}
+
+/*!
+ * \brief
+ * Print the last errors from the servers for the volume on
+ * this request.
+ *
+ * \param[in] areq The request record associated with this operation.
+ * \param[in] afid The FID of the file involved in the action. This argument
+ * may be null if none was involved.
+ *
+ * \return
+ * None
*
- * Description:
+ * \note
+ * This routine is called before a hard-mount retry, to display
+ * the servers by primary address and the errors encountered.
+ */
+static void
+afs_PrintServerErrors(struct vrequest *areq, struct VenusFid *afid)
+{
+ int i;
+ struct volume *tvp;
+ struct srvAddr *sa;
+ afs_uint32 address;
+ char *sep = " (";
+ char *term = "";
+
+ if (afid) {
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (tvp) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i]) {
+ sa = tvp->serverHost[i]->addr;
+ if (sa) {
+ address = ntohl(sa->sa_ip);
+ afs_warnuser("%s%d.%d.%d.%d code=%d", sep,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff,
+ areq->lasterror[i]);
+ sep = ", ";
+ term = ")";
+ }
+ }
+ }
+ afs_PutVolume(tvp, READ_LOCK);
+ }
+ }
+ afs_warnuser("%s\n", term);
+}
+
+/*!
+ * \brief
* Analyze the outcome of an RPC operation, taking whatever support
* actions are necessary.
*
- * Arguments:
- * aconn : Ptr to the relevant connection on which the call was made.
- * acode : The return code experienced by the RPC.
- * afid : The FID of the file involved in the action. This argument
- * may be null if none was involved.
- * areq : The request record associated with this operation.
- * op : which RPC we are analyzing.
- * cellp : pointer to a cell struct. Must provide either fid or cell.
+ * \param[in] aconn Ptr to the relevant connection on which the call was made.
+ * \param[in] acode The return code experienced by the RPC.
+ * \param[in] fid The FID of the file involved in the action. This argument
+ * may be null if none was involved.
+ * \param[in,out] areq The request record associated with this operation.
+ * \param[in] op which RPC we are analyzing.
+ * \param[in] cellp pointer to a cell struct. Must provide either fid or cell.
*
- * Returns:
+ * \returns
* Non-zero value if the related RPC operation should be retried,
* zero otherwise.
*
- * Environment:
+ * \note
* This routine is typically called in a do-while loop, causing the
* embedded RPC operation to be called repeatedly if appropriate
* until whatever error condition (if any) is intolerable.
*
- * Side Effects:
- * As advertised.
- *
- * NOTE:
+ * \note
* The retry return value is used by afs_StoreAllSegments to determine
* if this is a temporary or permanent error.
- *------------------------------------------------------------------------*/
+ */
int
-afs_Analyze(register struct conn *aconn, afs_int32 acode,
- struct VenusFid *afid, register struct vrequest *areq, int op,
- afs_int32 locktype, struct cell *cellp)
+afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
+ afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
+ int op, afs_int32 locktype, struct cell *cellp)
{
afs_int32 i;
struct srvAddr *sa;
struct server *tsp;
- struct volume *tvp;
+ struct volume *tvp = NULL;
afs_int32 shouldRetry = 0;
+ afs_int32 serversleft = 1;
struct afs_stats_RPCErrors *aerrP;
+ afs_uint32 address;
+
+ if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
+ /* On reconnection, act as connected. XXX: for now.... */
+ /* SXW - This may get very tired after a while. We should try and
+ * intercept all RPCs before they get here ... */
+ /*printf("afs_Analyze: disconnected\n");*/
+ afs_FinalizeReq(areq);
+ if (aconn) {
+ /* SXW - I suspect that this will _never_ happen - we shouldn't
+ * get a connection because we're disconnected !!!*/
+ afs_PutConn(aconn, rxconn, locktype);
+ }
+ return 0;
+ }
AFS_STATCNT(afs_Analyze);
afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
&& tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
cell->cellName : ""));
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
tvp->status[i] = not_busy;
}
return shouldRetry; /* should retry */
}
- if (!aconn) {
+ if (!aconn || !aconn->parent->srvr) {
if (!areq->volumeError) {
if (aerrP)
(aerrP->err_Network)++;
((afid && afs_IsPrimaryCellNum(afid->Cell))
|| (cellp && afs_IsPrimaryCell(cellp)))) {
if (!afid) {
- afs_warnuser
- ("afs: hard-mount waiting for a vlserver to return to service\n");
+ static int afs_vl_hm = 0;
+ int warn = 0;
+ if (!afs_vl_hm) {
+ afs_vl_hm = warn = 1;
+ }
+ if (warn) {
+ afs_warnuser
+ ("afs: hard-mount waiting for a vlserver to return to service\n");
+ }
VSleep(hm_retry_int);
afs_CheckServers(1, cellp);
shouldRetry = 1;
+
+ if (warn) {
+ afs_vl_hm = 0;
+ }
} else {
+ static int afs_unknown_vhm = 0;
+ int warn = 0, vp_vhm = 0;
+
tvp = afs_FindVolume(afid, READ_LOCK);
if (!tvp || (tvp->states & VRO)) {
shouldRetry = hm_retry_RO;
} else {
shouldRetry = hm_retry_RW;
}
+
+ /* Set 'warn' if we should afs_warnuser. Only let one
+ * caller call afs_warnuser per hm_retry_int interval per
+ * volume. */
+ if (shouldRetry) {
+ if (tvp) {
+ if (!(tvp->states & VHardMount)) {
+ tvp->states |= VHardMount;
+ warn = vp_vhm = 1;
+ }
+ } else {
+ if (!afs_unknown_vhm) {
+ afs_unknown_vhm = 1;
+ warn = 1;
+ }
+ }
+ }
+
if (tvp)
afs_PutVolume(tvp, READ_LOCK);
+
if (shouldRetry) {
- afs_warnuser
- ("afs: hard-mount waiting for volume %u\n",
- afid->Fid.Volume);
+ if (warn) {
+ afs_warnuser
+ ("afs: hard-mount waiting for volume %u",
+ afid->Fid.Volume);
+ afs_PrintServerErrors(areq, afid);
+ }
+
VSleep(hm_retry_int);
afs_CheckServers(1, cellp);
+ /* clear the black listed servers on this request. */
+ memset(areq->skipserver, 0, sizeof(areq->skipserver));
+
+ if (vp_vhm) {
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (tvp) {
+ tvp->states &= ~VHardMount;
+ afs_PutVolume(tvp, READ_LOCK);
+ }
+ } else if (warn) {
+ afs_unknown_vhm = 0;
+ }
}
}
} /* if (hm_retry_int ... */
else {
- areq->networkError = 1;
+ if (acode == RX_MSGSIZE)
+ shouldRetry = 1;
+ else {
+ areq->networkError = 1;
+ /* do not promote to shouldRetry if not already */
+ if (afs_ClearStatus(afid, op, NULL) == 0)
+ shouldRetry = 0;
+ }
}
}
return shouldRetry;
}
/* Find server associated with this connection. */
- sa = aconn->srvr;
+ sa = aconn->parent->srvr;
tsp = sa->server;
+ address = ntohl(sa->sa_ip);
/* Before we do anything with acode, make sure we translate it back to
* a system error */
if (areq->volumeError) {
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->serverHost[i] == tsp) {
tvp->status[i] = not_busy;
}
}
}
- afs_PutConn(aconn, locktype);
+ afs_PutConn(aconn, rxconn, locktype);
return 0;
}
+ /* Save the last code of this server on this request. */
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if (tvp) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
+ if (tvp->serverHost[i] == tsp) {
+ areq->lasterror[i] = acode;
+ }
+ }
+ afs_PutVolume(tvp, READ_LOCK);
+ }
+
/* If network troubles, mark server as having bogued out again. */
- /* VRESTARTING is < 0 because of backward compatibility issues
+ /* VRESTARTING is < 0 because of backward compatibility issues
* with 3.4 file servers and older cache managers */
#ifdef AFS_64BIT_CLIENT
if (acode == -455)
acode = 455;
#endif /* AFS_64BIT_CLIENT */
if ((acode < 0) && (acode != VRESTARTING)) {
- afs_ServerDown(sa);
- ForceNewConnections(sa); /*multi homed clients lock:afs_xsrvAddr? */
+ if (acode == RX_MSGSIZE || acode == RX_CALL_BUSY) {
+ shouldRetry = 1;
+ goto out;
+ }
+ if (acode == RX_CALL_TIMEOUT || acode == RX_CALL_IDLE) {
+ serversleft = afs_BlackListOnce(areq, afid, tsp);
+ if (afid)
+ tvp = afs_FindVolume(afid, READ_LOCK);
+ if ((serversleft == 0) && tvp &&
+ ((tvp->states & VRO) || (tvp->states & VBackup))) {
+ shouldRetry = 0;
+ } else {
+ shouldRetry = 1;
+ }
+ if (!afid || !tvp || (tvp->states & VRO))
+ areq->idleError++;
+ else if (afs_ClearStatus(afid, op, tvp) == 0)
+ shouldRetry = 0;
+
+ if (tvp)
+ afs_PutVolume(tvp, READ_LOCK);
+ /* By doing this, we avoid ever marking a server down
+ * in an idle timeout case. That's because the server is
+ * still responding and may only be letting a single vnode
+ * time out. We otherwise risk having the server continually
+ * be marked down, then up, then down again...
+ */
+ goto out;
+ }
+ afs_ServerDown(sa, acode);
+ ForceNewConnections(sa); /* multi homed clients lock:afs_xsrvAddr? */
if (aerrP)
(aerrP->err_Server)++;
}
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS; i++) {
+ for (i = 0; i < AFS_MAXHOSTS; i++) {
if (tvp->serverHost[i] == tsp) {
tvp->status[i] = rdwr_busy; /* can't tell which yet */
/* to tell which, have to look at the op code. */
}
afs_PutVolume(tvp, READ_LOCK);
} else {
- afs_warnuser("afs: Waiting for busy volume %u in cell %s\n",
- (afid ? afid->Fid.Volume : 0), tsp->cell->cellName);
+ afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
+ (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
VSleep(afs_BusyWaitPeriod); /* poll periodically */
}
shouldRetry = 1;
|| (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
/* any rxkad error is treated as token expiration */
struct unixuser *tu;
-
/*
* I'm calling these errors protection errors, since they involve
* faulty authentication.
tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
if (tu) {
- if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
- afs_warnuser
- ("afs: Tokens for user of AFS id %d for cell %s have expired\n",
- tu->vid, aconn->srvr->server->cell->cellName);
- else
+ if (acode == VICETOKENDEAD) {
+ aconn->forceConnectFS = 1;
+ } else if (acode == RXKADEXPIRED) {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->parent->user->states |= UTokensBad;
+ afs_NotifyUser(tu, UTokensDropped);
afs_warnuser
- ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
- tu->vid, aconn->srvr->server->cell->cellName, acode);
+ ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
+ tu->viceId, aconn->parent->srvr->server->cell->cellName,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+ } else {
+ serversleft = afs_BlackListOnce(areq, afid, tsp);
+ areq->tokenError++;
+
+ if (serversleft) {
+ afs_warnuser
+ ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
+ tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+ shouldRetry = 1;
+ } else {
+ areq->tokenError = 0;
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->parent->user->states |= UTokensBad;
+ afs_NotifyUser(tu, UTokensDropped);
+ afs_warnuser
+ ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
+ tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+ }
+ }
afs_PutUser(tu, READ_LOCK);
} else {
/* The else case shouldn't be possible and should probably be replaced by a panic? */
- if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
+ if (acode == VICETOKENDEAD) {
+ aconn->forceConnectFS = 1;
+ } else if (acode == RXKADEXPIRED) {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->parent->user->states |= UTokensBad;
+ afs_NotifyUser(tu, UTokensDropped);
afs_warnuser
- ("afs: Tokens for user %d for cell %s have expired\n",
- areq->uid, aconn->srvr->server->cell->cellName);
- else
+ ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
+ areq->uid, aconn->parent->srvr->server->cell->cellName,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+ } else {
+ aconn->forceConnectFS = 0; /* don't check until new tokens set */
+ aconn->parent->user->states |= UTokensBad;
+ afs_NotifyUser(tu, UTokensDropped);
afs_warnuser
- ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
- areq->uid, aconn->srvr->server->cell->cellName, acode);
+ ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
+ areq->uid, aconn->parent->srvr->server->cell->cellName,
+ acode,
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+
+ }
}
- aconn->forceConnectFS = 0; /* don't check until new tokens set */
- aconn->user->states |= UTokensBad;
shouldRetry = 1; /* Try again (as root). */
}
/* Check for access violation. */
}
/* check for ubik errors; treat them like crashed servers */
else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
- afs_ServerDown(sa);
+ afs_ServerDown(sa, acode);
if (aerrP)
(aerrP->err_Server)++;
shouldRetry = 1; /* retryable (maybe one is working) */
same = VLDB_Same(afid, areq);
tvp = afs_FindVolume(afid, READ_LOCK);
if (tvp) {
- for (i = 0; i < MAXHOSTS && tvp->serverHost[i]; i++) {
+ for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
if (tvp->serverHost[i] == tsp) {
if (tvp->status[i] == end_not_busy)
tvp->status[i] = offline;
areq->permWriteError = 1;
shouldRetry = 0; /* Other random Vice error. */
} else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
+ afs_warnuser
+ ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
+ (address >> 24), (address >> 16) & 0xff,
+ (address >> 8) & 0xff, (address) & 0xff);
+
VSleep(1); /* Just a hack for desperate times. */
if (aerrP)
(aerrP->err_Other)++;
* retry in case there is another server. However, if we find
* no connection (aconn == 0) we set the networkError flag.
*/
- afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
+ afs_ServerDown(sa, acode);
if (aerrP)
(aerrP->err_Server)++;
VSleep(1); /* Just a hack for desperate times. */
shouldRetry = 1;
}
-
+out:
/* now unlock the connection and return */
- afs_PutConn(aconn, locktype);
+ afs_PutConn(aconn, rxconn, locktype);
return (shouldRetry);
} /*afs_Analyze */