2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 #include <afsconfig.h>
14 #include "afs/param.h"
18 #include "afs/sysincludes.h" /* Standard vendor system headers */
21 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
23 #include <netinet/in.h>
27 #include "h/hashing.h"
29 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
30 #include <netinet/in_var.h>
34 #include "afsincludes.h" /* Afs-based standard headers */
35 #include "afs/afs_stats.h" /* afs statistics */
36 #include "afs/afs_util.h"
37 #include "afs/unified_afs.h"
39 #if defined(AFS_SUN5_ENV)
41 #include <inet/common.h>
42 #include <netinet/ip6.h>
46 /* shouldn't do it this way, but for now will do */
47 #ifndef ERROR_TABLE_BASE_U
48 #define ERROR_TABLE_BASE_U (5376L)
49 #endif /* ubik error base define */
51 /* shouldn't do it this way, but for now will do */
52 #ifndef ERROR_TABLE_BASE_uae
53 #define ERROR_TABLE_BASE_uae (49733376L)
54 #endif /* unified afs error base define */
56 /* same hack for vlserver error base as for ubik error base */
57 #ifndef ERROR_TABLE_BASE_VL
58 #define ERROR_TABLE_BASE_VL (363520L)
59 #define VL_NOENT (363524L)
60 #endif /* vlserver error base define */
63 int afs_BusyWaitPeriod = 15; /**< poll period, in seconds */
65 afs_int32 hm_retry_RO = 0; /**< enable read-only hard-mount retry */
66 afs_int32 hm_retry_RW = 0; /**< enable read-write hard-mount retry */
67 afs_int32 hm_retry_int = 0; /**< hard-mount retry interval, in seconds */
69 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
78 * Request vldb record to determined if it has changed.
80 * \retval 0 if the vldb record for a specific volume is different from what
81 * we have cached -- perhaps the volume has moved.
82 * \retval 1 if the vldb record is the same
83 * \retval 2 if we can't tell if it's the same or not.
86 * If 0 returned, the caller will probably start over at the beginning of our
87 * list of servers for this volume and try to find one that is up. If
88 * not 0, we will probably just keep plugging with what we have
89 * cached. If we fail to contact the VL server, we should just keep
90 * trying with the information we have, rather than failing.
93 VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
96 struct afs_conn *tconn;
100 struct nvldbentry ntve;
101 struct uvldbentry utve;
105 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
106 unsigned int changed;
107 struct server *(oldhosts[NMAXNSERVERS]);
108 struct rx_connection *rxconn;
110 AFS_STATCNT(CheckVLDB);
111 afs_FinalizeReq(areq);
113 if ((i = afs_InitReq(&treq, afs_osi_credp)))
115 v = afs_osi_Alloc(sizeof(*v));
116 osi_Assert(v != NULL);
117 tcell = afs_GetCell(afid->Cell, READ_LOCK);
118 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
120 VSleep(2); /* Better safe than sorry. */
122 afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
123 &treq, SHARED_LOCK, 0, &rxconn);
125 if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
128 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
130 } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
133 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
138 i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
140 if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
141 if (i == RXGEN_OPCODE) {
144 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
146 if (i == RXGEN_OPCODE) {
148 tconn->parent->srvr->server->flags |= SNO_LHOSTS;
150 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
153 tconn->parent->srvr->server->flags |= SYES_LHOSTS;
155 tconn->parent->srvr->server->flags |= SVLSRV_UUID;
161 } while (afs_Analyze(tconn, rxconn, i, NULL, &treq, -1, /* no op code for this */
162 SHARED_LOCK, tcell));
164 afs_PutCell(tcell, READ_LOCK);
165 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
169 afs_osi_Free(v, sizeof(*v));
172 /* have info, copy into serverHost array */
174 tvp = afs_FindVolume(afid, WRITE_LOCK);
176 ObtainWriteLock(&tvp->lock, 107);
177 for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
178 oldhosts[i] = tvp->serverHost[i];
180 ReleaseWriteLock(&tvp->lock);
183 LockAndInstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq);
184 } else if (type == 1) {
185 LockAndInstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
187 LockAndInstallVolumeEntry(tvp, &v->tve, afid->Cell);
190 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
193 for (--i; !changed && i >= 0; i--) {
194 if (tvp->serverHost[i] != oldhosts[i]) {
195 changed = 1; /* also happens if prefs change. big deal. */
199 ReleaseWriteLock(&tvp->lock);
200 afs_PutVolume(tvp, WRITE_LOCK);
201 } else { /* can't find volume */
202 tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
204 afs_PutVolume(tvp, WRITE_LOCK);
205 afs_osi_Free(v, sizeof(*v));
208 afs_osi_Free(v, sizeof(*v));
213 afs_osi_Free(v, sizeof(*v));
214 return (changed ? DIFFERENT : SAME);
219 * Mark a server as invalid for further attempts of this request only.
221 * \param[in,out] areq The request record associated with this operation.
222 * \param[in] afid The FID of the file involved in the action. This argument
223 * may be null if none was involved.
224 * \param[in,out] tsp pointer to a server struct for the server we wish to
228 * Non-zero value if further servers are available to try,
232 * This routine is typically called in situations where we believe
233 * one server out of a pool may have an error condition.
236 * The afs_Conn* routines use the list of invalidated servers to
237 * avoid reusing a server marked as invalid for this request.
240 afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
245 afs_int32 serversleft = 0;
248 tvp = afs_FindVolume(afid, READ_LOCK);
250 for (i = 0; i < AFS_MAXHOSTS; i++) {
251 if (tvp->serverHost[i] == tsp) {
252 areq->skipserver[i] = 1;
254 if (tvp->serverHost[i] &&
255 (tvp->serverHost[i]->addr->sa_flags &
257 areq->skipserver[i] = 1;
260 for (i = 0; i < AFS_MAXHOSTS; i++) {
261 if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
266 afs_PutVolume(tvp, READ_LOCK);
275 * Analyze the outcome of an RPC operation, taking whatever support
276 * actions are necessary.
278 * \param[in] afid The FID of the file involved in the action. This argument
279 * may be null if none was involved.
280 * \param[in] op which RPC we are analyzing.
281 * \param[in,out] avp A pointer to the struct volume, if we already have one.
284 * Non-zero value if the related RPC operation can be retried,
288 * This routine is called when we got a network error,
289 * and discards state if the operation was a data-mutating
293 afs_ClearStatus(struct VenusFid *afid, int op, struct volume *avp)
295 struct volume *tvp = NULL;
297 /* if it's not a write op, we have nothing to veto and shouldn't clear. */
298 if (!AFS_STATS_FS_RPCIDXES_ISWRITE(op)) {
305 tvp = afs_FindVolume(afid, READ_LOCK);
307 /* don't assume just discarding will fix if no cached volume */
310 ObtainReadLock(&afs_xvcache);
311 if ((tvc = afs_FindVCache(afid, 0, 0))) {
312 ReleaseReadLock(&afs_xvcache);
313 tvc->f.states &= ~(CStatd | CUnique);
316 ReleaseReadLock(&afs_xvcache);
319 afs_PutVolume(tvp, READ_LOCK);
322 if (AFS_STATS_FS_RPCIDXES_WRITE_RETRIABLE(op))
325 /* not retriable: we may have raced ourselves */
331 * Analyze the outcome of an RPC operation, taking whatever support
332 * actions are necessary.
334 * \param[in] aconn Ptr to the relevant connection on which the call was made.
335 * \param[in] acode The return code experienced by the RPC.
336 * \param[in] fid The FID of the file involved in the action. This argument
337 * may be null if none was involved.
338 * \param[in,out] areq The request record associated with this operation.
339 * \param[in] op which RPC we are analyzing.
340 * \param[in] cellp pointer to a cell struct. Must provide either fid or cell.
343 * Non-zero value if the related RPC operation should be retried,
347 * This routine is typically called in a do-while loop, causing the
348 * embedded RPC operation to be called repeatedly if appropriate
349 * until whatever error condition (if any) is intolerable.
352 * The retry return value is used by afs_StoreAllSegments to determine
353 * if this is a temporary or permanent error.
356 afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
357 afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
358 int op, afs_int32 locktype, struct cell *cellp)
363 struct volume *tvp = NULL;
364 afs_int32 shouldRetry = 0;
365 afs_int32 serversleft = 1;
366 struct afs_stats_RPCErrors *aerrP;
369 if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
370 /* On reconnection, act as connected. XXX: for now.... */
371 /* SXW - This may get very tired after a while. We should try and
372 * intercept all RPCs before they get here ... */
373 /*printf("afs_Analyze: disconnected\n");*/
374 afs_FinalizeReq(areq);
376 /* SXW - I suspect that this will _never_ happen - we shouldn't
377 * get a connection because we're disconnected !!!*/
378 afs_PutConn(aconn, rxconn, locktype);
383 AFS_STATCNT(afs_Analyze);
384 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
385 ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
388 aerrP = (struct afs_stats_RPCErrors *)0;
390 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
391 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
393 afs_FinalizeReq(areq);
394 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
396 tvp = afs_FindVolume(afid, READ_LOCK);
398 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
399 (afid ? afid->Fid.Volume : 0),
400 (tvp->name ? tvp->name : ""),
402 && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
403 cell->cellName : ""));
405 for (i = 0; i < AFS_MAXHOSTS; i++) {
406 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
407 tvp->status[i] = not_busy;
409 if (tvp->status[i] == not_busy)
412 afs_PutVolume(tvp, READ_LOCK);
414 afs_warnuser("afs: Waiting for busy volume %u\n",
415 (afid ? afid->Fid.Volume : 0));
418 if (areq->busyCount > 100) {
420 (aerrP->err_Volume)++;
421 areq->volumeError = VOLBUSY;
424 VSleep(afs_BusyWaitPeriod); /* poll periodically */
426 if (shouldRetry != 0)
429 return shouldRetry; /* should retry */
432 if (!aconn || !aconn->parent->srvr) {
433 if (!areq->volumeError) {
435 (aerrP->err_Network)++;
436 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
437 ((afid && afs_IsPrimaryCellNum(afid->Cell))
438 || (cellp && afs_IsPrimaryCell(cellp)))) {
440 static int afs_vl_hm = 0;
443 afs_vl_hm = warn = 1;
447 ("afs: hard-mount waiting for a vlserver to return to service\n");
449 VSleep(hm_retry_int);
450 afs_CheckServers(1, cellp);
457 static int afs_unknown_vhm = 0;
458 int warn = 0, vp_vhm = 0;
460 tvp = afs_FindVolume(afid, READ_LOCK);
461 if (!tvp || (tvp->states & VRO)) {
462 shouldRetry = hm_retry_RO;
464 shouldRetry = hm_retry_RW;
467 /* Set 'warn' if we should afs_warnuser. Only let one
468 * caller call afs_warnuser per hm_retry_int interval per
472 if (!(tvp->states & VHardMount)) {
473 tvp->states |= VHardMount;
477 if (!afs_unknown_vhm) {
485 afs_PutVolume(tvp, READ_LOCK);
490 ("afs: hard-mount waiting for volume %u\n",
494 VSleep(hm_retry_int);
495 afs_CheckServers(1, cellp);
496 /* clear the black listed servers on this request. */
497 memset(areq->skipserver, 0, sizeof(areq->skipserver));
500 tvp = afs_FindVolume(afid, READ_LOCK);
502 tvp->states &= ~VHardMount;
503 afs_PutVolume(tvp, READ_LOCK);
510 } /* if (hm_retry_int ... */
512 if (acode == RX_MSGSIZE)
515 areq->networkError = 1;
516 /* do not promote to shouldRetry if not already */
517 if (afs_ClearStatus(afid, op, NULL) == 0)
525 /* Find server associated with this connection. */
526 sa = aconn->parent->srvr;
528 address = ntohl(sa->sa_ip);
530 /* Before we do anything with acode, make sure we translate it back to
532 if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
533 acode = et_to_sys_error(acode);
536 /* If we previously took an error, mark this volume not busy */
537 if (areq->volumeError) {
538 tvp = afs_FindVolume(afid, READ_LOCK);
540 for (i = 0; i < AFS_MAXHOSTS; i++) {
541 if (tvp->serverHost[i] == tsp) {
542 tvp->status[i] = not_busy;
545 afs_PutVolume(tvp, READ_LOCK);
549 afs_PutConn(aconn, rxconn, locktype);
553 /* If network troubles, mark server as having bogued out again. */
554 /* VRESTARTING is < 0 because of backward compatibility issues
555 * with 3.4 file servers and older cache managers */
556 #ifdef AFS_64BIT_CLIENT
559 #endif /* AFS_64BIT_CLIENT */
560 if ((acode < 0) && (acode != VRESTARTING)) {
561 if (acode == RX_MSGSIZE || acode == RX_CALL_BUSY) {
565 if (acode == RX_CALL_TIMEOUT || acode == RX_CALL_IDLE) {
566 serversleft = afs_BlackListOnce(areq, afid, tsp);
568 tvp = afs_FindVolume(afid, READ_LOCK);
569 if ((serversleft == 0) && tvp &&
570 ((tvp->states & VRO) || (tvp->states & VBackup))) {
575 if (!afid || !tvp || (tvp->states & VRO))
577 else if (afs_ClearStatus(afid, op, tvp) == 0)
581 afs_PutVolume(tvp, READ_LOCK);
582 /* By doing this, we avoid ever marking a server down
583 * in an idle timeout case. That's because the server is
584 * still responding and may only be letting a single vnode
585 * time out. We otherwise risk having the server continually
586 * be marked down, then up, then down again...
590 afs_ServerDown(sa, acode);
591 ForceNewConnections(sa); /* multi homed clients lock:afs_xsrvAddr? */
593 (aerrP->err_Server)++;
596 if (acode == VBUSY || acode == VRESTARTING) {
597 if (acode == VBUSY) {
600 (aerrP->err_VolumeBusies)++;
604 tvp = afs_FindVolume(afid, READ_LOCK);
606 for (i = 0; i < AFS_MAXHOSTS; i++) {
607 if (tvp->serverHost[i] == tsp) {
608 tvp->status[i] = rdwr_busy; /* can't tell which yet */
609 /* to tell which, have to look at the op code. */
612 afs_PutVolume(tvp, READ_LOCK);
614 afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
615 (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
616 (address >> 24), (address >> 16) & 0xff,
617 (address >> 8) & 0xff, (address) & 0xff);
618 VSleep(afs_BusyWaitPeriod); /* poll periodically */
622 } else if (acode == VICETOKENDEAD
623 || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
624 /* any rxkad error is treated as token expiration */
627 * I'm calling these errors protection errors, since they involve
628 * faulty authentication.
631 (aerrP->err_Protection)++;
633 tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
635 if (acode == VICETOKENDEAD) {
636 aconn->forceConnectFS = 1;
637 } else if (acode == RXKADEXPIRED) {
638 aconn->forceConnectFS = 0; /* don't check until new tokens set */
639 aconn->parent->user->states |= UTokensBad;
640 afs_NotifyUser(tu, UTokensDropped);
642 ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
643 tu->viceId, aconn->parent->srvr->server->cell->cellName,
644 (address >> 24), (address >> 16) & 0xff,
645 (address >> 8) & 0xff, (address) & 0xff);
647 serversleft = afs_BlackListOnce(areq, afid, tsp);
652 ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
653 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
654 (address >> 24), (address >> 16) & 0xff,
655 (address >> 8) & 0xff, (address) & 0xff);
658 areq->tokenError = 0;
659 aconn->forceConnectFS = 0; /* don't check until new tokens set */
660 aconn->parent->user->states |= UTokensBad;
661 afs_NotifyUser(tu, UTokensDropped);
663 ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
664 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
665 (address >> 24), (address >> 16) & 0xff,
666 (address >> 8) & 0xff, (address) & 0xff);
669 afs_PutUser(tu, READ_LOCK);
671 /* The else case shouldn't be possible and should probably be replaced by a panic? */
672 if (acode == VICETOKENDEAD) {
673 aconn->forceConnectFS = 1;
674 } else if (acode == RXKADEXPIRED) {
675 aconn->forceConnectFS = 0; /* don't check until new tokens set */
676 aconn->parent->user->states |= UTokensBad;
677 afs_NotifyUser(tu, UTokensDropped);
679 ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
680 areq->uid, aconn->parent->srvr->server->cell->cellName,
681 (address >> 24), (address >> 16) & 0xff,
682 (address >> 8) & 0xff, (address) & 0xff);
684 aconn->forceConnectFS = 0; /* don't check until new tokens set */
685 aconn->parent->user->states |= UTokensBad;
686 afs_NotifyUser(tu, UTokensDropped);
688 ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
689 areq->uid, aconn->parent->srvr->server->cell->cellName,
691 (address >> 24), (address >> 16) & 0xff,
692 (address >> 8) & 0xff, (address) & 0xff);
696 shouldRetry = 1; /* Try again (as root). */
698 /* Check for access violation. */
699 else if (acode == EACCES) {
700 /* should mark access error in non-existent per-user global structure */
702 (aerrP->err_Protection)++;
703 areq->accessError = 1;
704 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
705 areq->permWriteError = 1;
708 /* check for ubik errors; treat them like crashed servers */
709 else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
710 afs_ServerDown(sa, acode);
712 (aerrP->err_Server)++;
713 shouldRetry = 1; /* retryable (maybe one is working) */
714 VSleep(1); /* just in case */
716 /* Check for bad volume data base / missing volume. */
717 else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
718 || acode == VNOSERVICE || acode == VMOVED) {
723 areq->volumeError = VOLMISSING;
725 (aerrP->err_Volume)++;
726 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
727 same = VLDB_Same(afid, areq);
728 tvp = afs_FindVolume(afid, READ_LOCK);
730 for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
731 if (tvp->serverHost[i] == tsp) {
732 if (tvp->status[i] == end_not_busy)
733 tvp->status[i] = offline;
737 tvp->status[i] = not_busy; /* reset the others */
740 afs_PutVolume(tvp, READ_LOCK);
743 } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) { /* vlserver errors */
745 areq->volumeError = VOLMISSING;
746 } else if (acode >= 0) {
748 (aerrP->err_Other)++;
749 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
750 areq->permWriteError = 1;
751 shouldRetry = 0; /* Other random Vice error. */
752 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
754 ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
755 (address >> 24), (address >> 16) & 0xff,
756 (address >> 8) & 0xff, (address) & 0xff);
758 VSleep(1); /* Just a hack for desperate times. */
760 (aerrP->err_Other)++;
761 shouldRetry = 1; /* packet was too big, please retry call */
764 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
765 /* If we get here, code < 0 and we have network/Server troubles.
766 * areq->networkError is not set here, since we always
767 * retry in case there is another server. However, if we find
768 * no connection (aconn == 0) we set the networkError flag.
770 afs_ServerDown(sa, acode);
772 (aerrP->err_Server)++;
773 VSleep(1); /* Just a hack for desperate times. */
777 /* now unlock the connection and return */
778 afs_PutConn(aconn, rxconn, locktype);
779 return (shouldRetry);