2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 #include <afsconfig.h>
14 #include "afs/param.h"
18 #include "afs/sysincludes.h" /* Standard vendor system headers */
21 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
23 #include <netinet/in.h>
27 #include "h/hashing.h"
29 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
30 #include <netinet/in_var.h>
34 #include "afsincludes.h" /* Afs-based standard headers */
35 #include "afs/afs_stats.h" /* afs statistics */
36 #include "afs/afs_util.h"
37 #include "afs/unified_afs.h"
39 #if defined(AFS_SUN56_ENV)
41 #include <inet/common.h>
42 #if defined(AFS_SUN58_ENV)
43 #include <netinet/ip6.h>
48 /* shouldn't do it this way, but for now will do */
49 #ifndef ERROR_TABLE_BASE_U
50 #define ERROR_TABLE_BASE_U (5376L)
51 #endif /* ubik error base define */
53 /* shouldn't do it this way, but for now will do */
54 #ifndef ERROR_TABLE_BASE_uae
55 #define ERROR_TABLE_BASE_uae (49733376L)
56 #endif /* unified afs error base define */
58 /* same hack for vlserver error base as for ubik error base */
59 #ifndef ERROR_TABLE_BASE_VL
60 #define ERROR_TABLE_BASE_VL (363520L)
61 #define VL_NOENT (363524L)
62 #endif /* vlserver error base define */
65 int afs_BusyWaitPeriod = 15; /* poll every 15 seconds */
67 afs_int32 hm_retry_RO = 0; /* don't wait */
68 afs_int32 hm_retry_RW = 0; /* don't wait */
69 afs_int32 hm_retry_int = 0; /* don't wait */
71 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
76 * 0 if the vldb record for a specific volume is different from what
77 * we have cached -- perhaps the volume has moved.
78 * 1 if the vldb record is the same
79 * 2 if we can't tell if it's the same or not.
81 * If 0, the caller will probably start over at the beginning of our
82 * list of servers for this volume and try to find one that is up. If
83 * not 0, we will probably just keep plugging with what we have
84 * cached. If we fail to contact the VL server, we should just keep
85 * trying with the information we have, rather than failing. */
90 VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
93 struct afs_conn *tconn;
97 struct nvldbentry ntve;
98 struct uvldbentry utve;
102 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
103 unsigned int changed;
104 struct server *(oldhosts[NMAXNSERVERS]);
105 struct rx_connection *rxconn;
107 AFS_STATCNT(CheckVLDB);
108 afs_FinalizeReq(areq);
110 if ((i = afs_InitReq(&treq, afs_osi_credp)))
112 v = afs_osi_Alloc(sizeof(*v));
113 osi_Assert(v != NULL);
114 tcell = afs_GetCell(afid->Cell, READ_LOCK);
115 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
117 VSleep(2); /* Better safe than sorry. */
119 afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
120 &treq, SHARED_LOCK, &rxconn);
122 if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
125 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
127 } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
130 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
135 i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
137 if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
138 if (i == RXGEN_OPCODE) {
141 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
143 if (i == RXGEN_OPCODE) {
145 tconn->parent->srvr->server->flags |= SNO_LHOSTS;
147 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
150 tconn->parent->srvr->server->flags |= SYES_LHOSTS;
152 tconn->parent->srvr->server->flags |= SVLSRV_UUID;
158 } while (afs_Analyze(tconn, rxconn, i, NULL, &treq, -1, /* no op code for this */
159 SHARED_LOCK, tcell));
161 afs_PutCell(tcell, READ_LOCK);
162 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
166 afs_osi_Free(v, sizeof(*v));
169 /* have info, copy into serverHost array */
171 tvp = afs_FindVolume(afid, WRITE_LOCK);
173 ObtainWriteLock(&tvp->lock, 107);
174 for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
175 oldhosts[i] = tvp->serverHost[i];
179 InstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq);
180 } else if (type == 1) {
181 InstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
183 InstallVolumeEntry(tvp, &v->tve, afid->Cell);
186 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
189 for (--i; !changed && i >= 0; i--) {
190 if (tvp->serverHost[i] != oldhosts[i]) {
191 changed = 1; /* also happens if prefs change. big deal. */
195 ReleaseWriteLock(&tvp->lock);
196 afs_PutVolume(tvp, WRITE_LOCK);
197 } else { /* can't find volume */
198 tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
200 afs_PutVolume(tvp, WRITE_LOCK);
201 afs_osi_Free(v, sizeof(*v));
204 afs_osi_Free(v, sizeof(*v));
209 afs_osi_Free(v, sizeof(*v));
210 return (changed ? DIFFERENT : SAME);
213 /*------------------------------------------------------------------------
217 * Mark a server as invalid for further attempts of this request only.
220 * areq : The request record associated with this operation.
221 * afid : The FID of the file involved in the action. This argument
222 * may be null if none was involved.
223 * tsp : pointer to a server struct for the server we wish to
227 * Non-zero value if further servers are available to try,
231 * This routine is typically called in situations where we believe
232 * one server out of a pool may have an error condition.
238 * The afs_Conn* routines use the list of invalidated servers to
239 * avoid reusing a server marked as invalid for this request.
240 *------------------------------------------------------------------------*/
242 afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
247 afs_int32 serversleft = 0;
250 tvp = afs_FindVolume(afid, READ_LOCK);
252 for (i = 0; i < AFS_MAXHOSTS; i++) {
253 if (tvp->serverHost[i] == tsp) {
254 areq->skipserver[i] = 1;
256 if (tvp->serverHost[i] &&
257 (tvp->serverHost[i]->addr->sa_flags &
259 areq->skipserver[i] = 1;
262 afs_PutVolume(tvp, READ_LOCK);
263 for (i = 0; i < AFS_MAXHOSTS; i++) {
264 if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
276 /*------------------------------------------------------------------------
277 * EXPORTED afs_Analyze
280 * Analyze the outcome of an RPC operation, taking whatever support
281 * actions are necessary.
284 * aconn : Ptr to the relevant connection on which the call was made.
285 * acode : The return code experienced by the RPC.
286 * afid : The FID of the file involved in the action. This argument
287 * may be null if none was involved.
288 * areq : The request record associated with this operation.
289 * op : which RPC we are analyzing.
290 * cellp : pointer to a cell struct. Must provide either fid or cell.
293 * Non-zero value if the related RPC operation should be retried,
297 * This routine is typically called in a do-while loop, causing the
298 * embedded RPC operation to be called repeatedly if appropriate
299 * until whatever error condition (if any) is intolerable.
305 * The retry return value is used by afs_StoreAllSegments to determine
306 * if this is a temporary or permanent error.
307 *------------------------------------------------------------------------*/
309 afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
310 afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
311 int op, afs_int32 locktype, struct cell *cellp)
316 struct volume *tvp = NULL;
317 afs_int32 shouldRetry = 0;
318 afs_int32 serversleft = 1;
319 struct afs_stats_RPCErrors *aerrP;
320 afs_int32 markeddown;
323 if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
324 /* On reconnection, act as connected. XXX: for now.... */
325 /* SXW - This may get very tired after a while. We should try and
326 * intercept all RPCs before they get here ... */
327 /*printf("afs_Analyze: disconnected\n");*/
328 afs_FinalizeReq(areq);
330 /* SXW - I suspect that this will _never_ happen - we shouldn't
331 * get a connection because we're disconnected !!!*/
332 afs_PutConn(aconn, rxconn, locktype);
337 AFS_STATCNT(afs_Analyze);
338 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
339 ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
342 aerrP = (struct afs_stats_RPCErrors *)0;
344 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
345 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
347 afs_FinalizeReq(areq);
348 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
350 tvp = afs_FindVolume(afid, READ_LOCK);
352 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
353 (afid ? afid->Fid.Volume : 0),
354 (tvp->name ? tvp->name : ""),
356 && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
357 cell->cellName : ""));
359 for (i = 0; i < AFS_MAXHOSTS; i++) {
360 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
361 tvp->status[i] = not_busy;
363 if (tvp->status[i] == not_busy)
366 afs_PutVolume(tvp, READ_LOCK);
368 afs_warnuser("afs: Waiting for busy volume %u\n",
369 (afid ? afid->Fid.Volume : 0));
372 if (areq->busyCount > 100) {
374 (aerrP->err_Volume)++;
375 areq->volumeError = VOLBUSY;
378 VSleep(afs_BusyWaitPeriod); /* poll periodically */
380 if (shouldRetry != 0)
383 return shouldRetry; /* should retry */
386 if (!aconn || !aconn->parent->srvr) {
387 if (!areq->volumeError) {
389 (aerrP->err_Network)++;
390 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
391 ((afid && afs_IsPrimaryCellNum(afid->Cell))
392 || (cellp && afs_IsPrimaryCell(cellp)))) {
395 ("afs: hard-mount waiting for a vlserver to return to service\n");
396 VSleep(hm_retry_int);
397 afs_CheckServers(1, cellp);
400 tvp = afs_FindVolume(afid, READ_LOCK);
401 if (!tvp || (tvp->states & VRO)) {
402 shouldRetry = hm_retry_RO;
404 shouldRetry = hm_retry_RW;
407 afs_PutVolume(tvp, READ_LOCK);
410 ("afs: hard-mount waiting for volume %u\n",
412 VSleep(hm_retry_int);
413 afs_CheckServers(1, cellp);
416 } /* if (hm_retry_int ... */
418 if (acode == RX_MSGSIZE)
421 areq->networkError = 1;
427 /* Find server associated with this connection. */
428 sa = aconn->parent->srvr;
430 address = ntohl(sa->sa_ip);
432 /* Before we do anything with acode, make sure we translate it back to
434 if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
435 acode = et_to_sys_error(acode);
438 /* If we previously took an error, mark this volume not busy */
439 if (areq->volumeError) {
440 tvp = afs_FindVolume(afid, READ_LOCK);
442 for (i = 0; i < AFS_MAXHOSTS; i++) {
443 if (tvp->serverHost[i] == tsp) {
444 tvp->status[i] = not_busy;
447 afs_PutVolume(tvp, READ_LOCK);
451 afs_PutConn(aconn, rxconn, locktype);
455 /* If network troubles, mark server as having bogued out again. */
456 /* VRESTARTING is < 0 because of backward compatibility issues
457 * with 3.4 file servers and older cache managers */
458 #ifdef AFS_64BIT_CLIENT
461 #endif /* AFS_64BIT_CLIENT */
462 if ((acode < 0) && (acode != VRESTARTING)) {
463 if (acode == RX_MSGSIZE) {
467 if (acode == RX_CALL_TIMEOUT) {
468 serversleft = afs_BlackListOnce(areq, afid, tsp);
470 tvp = afs_FindVolume(afid, READ_LOCK);
471 if (!afid || !tvp || (tvp->states & VRO))
473 if ((serversleft == 0) && tvp &&
474 ((tvp->states & VRO) || (tvp->states & VBackup))) {
480 afs_PutVolume(tvp, READ_LOCK);
481 /* By doing this, we avoid ever marking a server down
482 * in an idle timeout case. That's because the server is
483 * still responding and may only be letting a single vnode
484 * time out. We otherwise risk having the server continually
485 * be marked down, then up, then down again...
489 markeddown = afs_ServerDown(sa);
490 ForceNewConnections(sa); /**multi homed clients lock:afs_xsrvAddr? */
492 (aerrP->err_Server)++;
494 /* retry *once* when the server is timed out in case of NAT */
495 if (markeddown && acode == RX_CALL_DEAD) {
496 aconn->forceConnectFS = 1;
502 if (acode == VBUSY || acode == VRESTARTING) {
503 if (acode == VBUSY) {
506 (aerrP->err_VolumeBusies)++;
510 tvp = afs_FindVolume(afid, READ_LOCK);
512 for (i = 0; i < AFS_MAXHOSTS; i++) {
513 if (tvp->serverHost[i] == tsp) {
514 tvp->status[i] = rdwr_busy; /* can't tell which yet */
515 /* to tell which, have to look at the op code. */
518 afs_PutVolume(tvp, READ_LOCK);
520 afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
521 (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
522 (address >> 24), (address >> 16) & 0xff,
523 (address >> 8) & 0xff, (address) & 0xff);
524 VSleep(afs_BusyWaitPeriod); /* poll periodically */
528 } else if (acode == VICETOKENDEAD
529 || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
530 /* any rxkad error is treated as token expiration */
533 * I'm calling these errors protection errors, since they involve
534 * faulty authentication.
537 (aerrP->err_Protection)++;
539 tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
541 if (acode == VICETOKENDEAD) {
542 aconn->forceConnectFS = 1;
543 } else if (acode == RXKADEXPIRED) {
544 aconn->forceConnectFS = 0; /* don't check until new tokens set */
545 aconn->parent->user->states |= UTokensBad;
546 afs_NotifyUser(tu, UTokensDropped);
548 ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
549 tu->viceId, aconn->parent->srvr->server->cell->cellName,
550 (address >> 24), (address >> 16) & 0xff,
551 (address >> 8) & 0xff, (address) & 0xff);
553 serversleft = afs_BlackListOnce(areq, afid, tsp);
558 ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
559 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
560 (address >> 24), (address >> 16) & 0xff,
561 (address >> 8) & 0xff, (address) & 0xff);
564 areq->tokenError = 0;
565 aconn->forceConnectFS = 0; /* don't check until new tokens set */
566 aconn->parent->user->states |= UTokensBad;
567 afs_NotifyUser(tu, UTokensDropped);
569 ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
570 tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
571 (address >> 24), (address >> 16) & 0xff,
572 (address >> 8) & 0xff, (address) & 0xff);
575 afs_PutUser(tu, READ_LOCK);
577 /* The else case shouldn't be possible and should probably be replaced by a panic? */
578 if (acode == VICETOKENDEAD) {
579 aconn->forceConnectFS = 1;
580 } else if (acode == RXKADEXPIRED) {
581 aconn->forceConnectFS = 0; /* don't check until new tokens set */
582 aconn->parent->user->states |= UTokensBad;
583 afs_NotifyUser(tu, UTokensDropped);
585 ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
586 areq->uid, aconn->parent->srvr->server->cell->cellName,
587 (address >> 24), (address >> 16) & 0xff,
588 (address >> 8) & 0xff, (address) & 0xff);
590 aconn->forceConnectFS = 0; /* don't check until new tokens set */
591 aconn->parent->user->states |= UTokensBad;
592 afs_NotifyUser(tu, UTokensDropped);
594 ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
595 areq->uid, aconn->parent->srvr->server->cell->cellName,
597 (address >> 24), (address >> 16) & 0xff,
598 (address >> 8) & 0xff, (address) & 0xff);
602 shouldRetry = 1; /* Try again (as root). */
604 /* Check for access violation. */
605 else if (acode == EACCES) {
606 /* should mark access error in non-existent per-user global structure */
608 (aerrP->err_Protection)++;
609 areq->accessError = 1;
610 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
611 areq->permWriteError = 1;
614 /* check for ubik errors; treat them like crashed servers */
615 else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
618 (aerrP->err_Server)++;
619 shouldRetry = 1; /* retryable (maybe one is working) */
620 VSleep(1); /* just in case */
622 /* Check for bad volume data base / missing volume. */
623 else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
624 || acode == VNOSERVICE || acode == VMOVED) {
629 areq->volumeError = VOLMISSING;
631 (aerrP->err_Volume)++;
632 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
633 same = VLDB_Same(afid, areq);
634 tvp = afs_FindVolume(afid, READ_LOCK);
636 for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
637 if (tvp->serverHost[i] == tsp) {
638 if (tvp->status[i] == end_not_busy)
639 tvp->status[i] = offline;
643 tvp->status[i] = not_busy; /* reset the others */
646 afs_PutVolume(tvp, READ_LOCK);
649 } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) { /* vlserver errors */
651 areq->volumeError = VOLMISSING;
652 } else if (acode >= 0) {
654 (aerrP->err_Other)++;
655 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
656 areq->permWriteError = 1;
657 shouldRetry = 0; /* Other random Vice error. */
658 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
660 ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
661 (address >> 24), (address >> 16) & 0xff,
662 (address >> 8) & 0xff, (address) & 0xff);
664 VSleep(1); /* Just a hack for desperate times. */
666 (aerrP->err_Other)++;
667 shouldRetry = 1; /* packet was too big, please retry call */
670 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
671 /* If we get here, code < 0 and we have network/Server troubles.
672 * areq->networkError is not set here, since we always
673 * retry in case there is another server. However, if we find
674 * no connection (aconn == 0) we set the networkError flag.
676 afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
678 (aerrP->err_Server)++;
679 VSleep(1); /* Just a hack for desperate times. */
683 /* now unlock the connection and return */
684 afs_PutConn(aconn, rxconn, locktype);
685 return (shouldRetry);