2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 #include "../afs/param.h" /* Should be always first */
14 #include "../afs/stds.h"
15 #include "../afs/sysincludes.h" /* Standard vendor system headers */
18 #ifndef AFS_LINUX20_ENV
20 #include <netinet/in.h>
24 #include "../h/hashing.h"
26 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV)
27 #include <netinet/in_var.h>
31 #include "../afs/afsincludes.h" /* Afs-based standard headers */
32 #include "../afs/afs_stats.h" /* afs statistics */
33 #include "../afs/afs_util.h"
34 #include "../afs/afs_prototypes.h"
36 #if defined(AFS_SUN56_ENV)
38 #include <inet/common.h>
43 /* shouldn't do it this way, but for now will do */
44 #ifndef ERROR_TABLE_BASE_u
45 #define ERROR_TABLE_BASE_u (5376L)
46 #endif /* ubik error base define */
48 /* same hack for vlserver error base as for ubik error base */
49 #ifndef ERROR_TABLE_BASE_vl
50 #define ERROR_TABLE_BASE_vl (363520L)
51 #define VL_NOENT (363524L)
52 #endif /* vlserver error base define */
55 int afs_BusyWaitPeriod = 15; /* poll every 15 seconds */
58 afs_int32 hm_retry_RO=0; /* don't wait */
59 afs_int32 hm_retry_RW=0; /* don't wait */
60 afs_int32 hm_retry_int=0; /* don't wait */
62 void afs_CopyError(afrom, ato)
63 register struct vrequest *afrom;
64 register struct vrequest *ato;
67 AFS_STATCNT(afs_CopyError);
71 if (afrom->accessError)
73 if (afrom->volumeError)
75 if (afrom->networkError)
76 ato->networkError = 1;
77 if (afrom->permWriteError)
78 ato->permWriteError = 1;
83 void afs_FinalizeReq(areq)
84 register struct vrequest *areq;
87 AFS_STATCNT(afs_FinalizeReq);
91 areq->accessError = 0;
92 areq->volumeError = 0;
93 areq->networkError = 0;
94 areq->permWriteError = 0;
100 afs_CheckCode(acode, areq, where)
102 struct vrequest *areq;
106 AFS_STATCNT(afs_CheckCode);
108 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKCODE,
109 ICL_TYPE_INT32, acode, ICL_TYPE_INT32, where);
111 if (!areq || !areq->initd)
113 if (areq->networkError)
117 if (areq->accessError)
119 if (areq->volumeError == VOLMISSING)
121 if (areq->volumeError == VOLBUSY)
123 if (acode == VNOVNODE)
130 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
135 * 0 if the vldb record for a specific volume is different from what
136 * we have cached -- perhaps the volume has moved.
137 * 1 if the vldb record is the same
138 * 2 if we can't tell if it's the same or not.
140 * If 0, the caller will probably start over at the beginning of our
141 * list of servers for this volume and try to find one that is up. If
142 * not 0, we will probably just keep plugging with what we have
143 * cached. If we fail to contact the VL server, we should just keep
144 * trying with the information we have, rather than failing. */
148 static int VLDB_Same (afid, areq)
149 struct VenusFid *afid;
150 struct vrequest *areq;
152 struct vrequest treq;
156 struct vldbentry tve;
157 struct nvldbentry ntve;
158 struct uvldbentry utve;
162 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
163 unsigned int changed;
164 struct server *(oldhosts[NMAXNSERVERS]);
166 AFS_STATCNT(CheckVLDB);
167 afs_FinalizeReq(areq);
169 if (i = afs_InitReq(&treq, &afs_osi_cred)) return DUNNO;
170 tcell = afs_GetCell(afid->Cell, READ_LOCK);
171 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
173 VSleep(2); /* Better safe than sorry. */
174 tconn = afs_ConnByMHosts(tcell->cellHosts, tcell->vlport,
175 tcell->cell, &treq, SHARED_LOCK);
177 if (tconn->srvr->server->flags & SNO_LHOSTS) {
179 #ifdef RX_ENABLE_LOCKS
181 #endif /* RX_ENABLE_LOCKS */
182 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
183 #ifdef RX_ENABLE_LOCKS
185 #endif /* RX_ENABLE_LOCKS */
186 } else if (tconn->srvr->server->flags & SYES_LHOSTS) {
188 #ifdef RX_ENABLE_LOCKS
190 #endif /* RX_ENABLE_LOCKS */
191 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
192 #ifdef RX_ENABLE_LOCKS
194 #endif /* RX_ENABLE_LOCKS */
197 #ifdef RX_ENABLE_LOCKS
199 #endif /* RX_ENABLE_LOCKS */
200 i = VL_GetEntryByNameU(tconn->id, bp, &v.utve);
201 #ifdef RX_ENABLE_LOCKS
203 #endif /* RX_ENABLE_LOCKS */
204 if (!(tconn->srvr->server->flags & SVLSRV_UUID)) {
205 if (i == RXGEN_OPCODE) {
207 #ifdef RX_ENABLE_LOCKS
209 #endif /* RX_ENABLE_LOCKS */
210 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
211 #ifdef RX_ENABLE_LOCKS
213 #endif /* RX_ENABLE_LOCKS */
214 if (i == RXGEN_OPCODE) {
216 tconn->srvr->server->flags |= SNO_LHOSTS;
217 #ifdef RX_ENABLE_LOCKS
219 #endif /* RX_ENABLE_LOCKS */
220 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
221 #ifdef RX_ENABLE_LOCKS
223 #endif /* RX_ENABLE_LOCKS */
225 tconn->srvr->server->flags |= SYES_LHOSTS;
227 tconn->srvr->server->flags |= SVLSRV_UUID;
233 } while (afs_Analyze(tconn, i, (struct VenusFid *) 0, &treq,
234 -1, /* no op code for this */
235 SHARED_LOCK, tcell));
237 afs_PutCell(tcell, READ_LOCK);
238 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
244 /* have info, copy into serverHost array */
246 tvp = afs_FindVolume(afid, WRITE_LOCK);
248 ObtainWriteLock(&tvp->lock,107);
249 for (i=0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
250 oldhosts[i] = tvp->serverHost[i];
254 InstallUVolumeEntry(tvp, &v.utve, afid->Cell, tcell, &treq);
256 else if (type == 1) {
257 InstallNVolumeEntry(tvp, &v.ntve, afid->Cell);
260 InstallVolumeEntry(tvp, &v.tve, afid->Cell);
263 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
266 for (--i;!changed && i >= 0; i--) {
267 if (tvp->serverHost[i] != oldhosts[i]) {
268 changed = 1; /* also happens if prefs change. big deal. */
272 ReleaseWriteLock(&tvp->lock);
273 afs_PutVolume(tvp, WRITE_LOCK);
275 else { /* can't find volume */
276 tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
278 afs_PutVolume(tvp, WRITE_LOCK);
284 return (changed ? DIFFERENT : SAME);
288 /*------------------------------------------------------------------------
289 * EXPORTED afs_Analyze
292 * Analyze the outcome of an RPC operation, taking whatever support
293 * actions are necessary.
296 * aconn : Ptr to the relevant connection on which the call was made.
297 * acode : The return code experienced by the RPC.
298 * afid : The FID of the file involved in the action. This argument
299 * may be null if none was involved.
300 * areq : The request record associated with this operation.
301 * op : which RPC we are analyzing.
302 * cellp : pointer to a cell struct. Must provide either fid or cell.
305 * Non-zero value if the related RPC operation should be retried,
309 * This routine is typically called in a do-while loop, causing the
310 * embedded RPC operation to be called repeatedly if appropriate
311 * until whatever error condition (if any) is intolerable.
317 * The retry return value is used by afs_StoreAllSegments to determine
318 * if this is a temporary or permanent error.
319 *------------------------------------------------------------------------*/
320 int afs_Analyze(aconn, acode, afid, areq, op, locktype, cellp)
321 register struct conn *aconn;
323 register struct vrequest *areq;
324 struct VenusFid *afid;
334 afs_int32 shouldRetry = 0;
335 struct afs_stats_RPCErrors *aerrP;
338 AFS_STATCNT(afs_Analyze);
339 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
340 ICL_TYPE_POINTER, aconn,
341 ICL_TYPE_INT32, acode, ICL_TYPE_LONG, areq->uid);
343 aerrP = (struct afs_stats_RPCErrors *) 0;
345 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
346 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
348 afs_FinalizeReq(areq);
349 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
351 tvp = afs_FindVolume(afid, READ_LOCK);
353 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
354 (afid ? afid->Fid.Volume : 0),
355 (tvp->name ? tvp->name : ""),
356 ((tvp->serverHost[0] && tvp->serverHost[0]->cell) ?
357 tvp->serverHost[0]->cell->cellName : ""));
359 for (i=0; i < MAXHOSTS; i++) {
360 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
361 tvp->status[i] = not_busy;
363 if (tvp->status[i] == not_busy)
366 afs_PutVolume(tvp, READ_LOCK);
368 afs_warnuser("afs: Waiting for busy volume %u\n",
369 (afid ? afid->Fid.Volume : 0));
372 if (areq->busyCount > 100) {
374 (aerrP->err_Volume)++;
375 areq->volumeError = VOLBUSY;
378 VSleep(afs_BusyWaitPeriod); /* poll periodically */
380 return shouldRetry; /* should retry */
384 if (!areq->volumeError) {
386 (aerrP->err_Network)++;
387 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
388 ((afid && afid->Cell == LOCALCELL) ||
389 (cellp && cellp->cell == LOCALCELL))) {
391 afs_warnuser("afs: hard-mount waiting for a vlserver to return to service\n");
392 VSleep(hm_retry_int);
393 afs_CheckServers(1,cellp);
396 tvp = afs_FindVolume(afid, READ_LOCK);
397 if (!tvp || (tvp->states & VRO)) {
398 shouldRetry = hm_retry_RO;
400 shouldRetry = hm_retry_RW;
403 afs_PutVolume(tvp, READ_LOCK);
405 afs_warnuser("afs: hard-mount waiting for volume %u\n",
407 VSleep(hm_retry_int);
408 afs_CheckServers(1,cellp);
411 } /* if (hm_retry_int ... */
413 areq->networkError = 1;
419 /* Find server associated with this connection. */
424 /* If we previously took an error, mark this volume not busy */
425 if (areq->volumeError) {
426 tvp = afs_FindVolume(afid, READ_LOCK);
428 for (i=0; i<MAXHOSTS ; i++) {
429 if (tvp->serverHost[i] == tsp) {
430 tvp->status[i] = not_busy ;
433 afs_PutVolume(tvp, READ_LOCK);
437 afs_PutConn(aconn, locktype);
441 /* If network troubles, mark server as having bogued out again. */
442 /* VRESTARTING is < 0 because of backward compatibility issues
443 * with 3.4 file servers and older cache managers */
444 if ((acode < 0) && (acode != VRESTARTING)) {
446 ForceNewConnections(sa); /*multi homed clients lock:afs_xsrvAddr?*/
448 (aerrP->err_Server)++;
451 if (acode == VBUSY || acode == VRESTARTING) {
452 if (acode == VBUSY) {
455 (aerrP->err_VolumeBusies)++;
457 else areq->busyCount = 1;
459 tvp = afs_FindVolume(afid, READ_LOCK);
461 for (i=0; i < MAXHOSTS ; i++ ) {
462 if (tvp->serverHost[i] == tsp) {
463 tvp->status[i] = rdwr_busy ; /* can't tell which yet */
464 /* to tell which, have to look at the op code. */
467 afs_PutVolume(tvp, READ_LOCK);
470 afs_warnuser("afs: Waiting for busy volume %u in cell %s\n",
471 (afid? afid->Fid.Volume : 0), tsp->cell->cellName);
472 VSleep(afs_BusyWaitPeriod); /* poll periodically */
477 else if (acode == VICETOKENDEAD || (acode & ~0xff) == ERROR_TABLE_BASE_rxk) {
478 /* any rxkad error is treated as token expiration */
482 * I'm calling these errors protection errors, since they involve
483 * faulty authentication.
486 (aerrP->err_Protection)++;
488 tu = afs_FindUser(areq->uid, tsp->cell->cell, READ_LOCK);
490 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
491 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s have expired\n",
492 tu->vid, aconn->srvr->server->cell->cellName);
494 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
495 tu->vid, aconn->srvr->server->cell->cellName, acode);
496 afs_PutUser(tu, READ_LOCK);
498 /* The else case shouldn't be possible and should probably be replaced by a panic? */
499 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
500 afs_warnuser("afs: Tokens for user %d for cell %s have expired\n",
501 areq->uid, aconn->srvr->server->cell->cellName);
503 afs_warnuser("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
504 areq->uid, aconn->srvr->server->cell->cellName, acode);
506 aconn->forceConnectFS = 0; /* don't check until new tokens set */
507 aconn->user->states |= UTokensBad;
508 shouldRetry = 1; /* Try again (as root). */
510 /* Check for access violation. */
511 else if (acode == EACCES) {
512 /* should mark access error in non-existent per-user global structure */
514 (aerrP->err_Protection)++;
515 areq->accessError = 1;
516 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
517 areq->permWriteError = 1;
520 /* check for ubik errors; treat them like crashed servers */
521 else if (acode >= ERROR_TABLE_BASE_u && acode < ERROR_TABLE_BASE_u+255) {
524 (aerrP->err_Server)++;
525 shouldRetry = 1; /* retryable (maybe one is working) */
526 VSleep(1); /* just in case */
528 /* Check for bad volume data base / missing volume. */
529 else if (acode == VSALVAGE || acode == VOFFLINE
530 || acode == VNOVOL || acode == VNOSERVICE || acode == VMOVED) {
535 areq->volumeError = VOLMISSING;
537 (aerrP->err_Volume)++;
538 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
539 same = VLDB_Same(afid, areq);
540 tvp = afs_FindVolume(afid, READ_LOCK);
542 for (i=0; i < MAXHOSTS && tvp->serverHost[i]; i++ ) {
543 if (tvp->serverHost[i] == tsp) {
544 if (tvp->status[i] == end_not_busy)
545 tvp->status[i] = offline ;
550 tvp->status[i] = not_busy; /* reset the others */
553 afs_PutVolume(tvp, READ_LOCK);
557 else if (acode >= ERROR_TABLE_BASE_vl
558 && acode <= ERROR_TABLE_BASE_vl + 255) /* vlserver errors */ {
560 areq->volumeError = VOLMISSING;
562 else if (acode >= 0) {
564 (aerrP->err_Other)++;
565 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
566 areq->permWriteError = 1;
567 shouldRetry = 0; /* Other random Vice error. */
568 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
569 VSleep(1); /* Just a hack for desperate times. */
571 (aerrP->err_Other)++;
572 shouldRetry = 1; /* packet was too big, please retry call */
575 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
576 /* If we get here, code < 0 and we have network/Server troubles.
577 * areq->networkError is not set here, since we always
578 * retry in case there is another server. However, if we find
579 * no connection (aconn == 0) we set the networkError flag.
581 afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
583 (aerrP->err_Server)++;
584 VSleep(1); /* Just a hack for desperate times. */
588 /* now unlock the connection and return */
589 afs_PutConn(aconn, locktype);
590 return (shouldRetry);