2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
13 #include <afsconfig.h>
14 #include "../afs/param.h"
18 #include "../afs/stds.h"
19 #include "../afs/sysincludes.h" /* Standard vendor system headers */
22 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
24 #include <netinet/in.h>
28 #include "../h/hashing.h"
30 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN60_ENV)
31 #include <netinet/in_var.h>
35 #include "../afs/afsincludes.h" /* Afs-based standard headers */
36 #include "../afs/afs_stats.h" /* afs statistics */
37 #include "../afs/afs_util.h"
39 #if defined(AFS_SUN56_ENV)
41 #include <inet/common.h>
42 #if defined(AFS_SUN58_ENV)
43 #include <netinet/ip6.h>
49 /* shouldn't do it this way, but for now will do */
50 #ifndef ERROR_TABLE_BASE_U
51 #define ERROR_TABLE_BASE_U (5376L)
52 #endif /* ubik error base define */
54 /* same hack for vlserver error base as for ubik error base */
55 #ifndef ERROR_TABLE_BASE_VL
56 #define ERROR_TABLE_BASE_VL (363520L)
57 #define VL_NOENT (363524L)
58 #endif /* vlserver error base define */
61 int afs_BusyWaitPeriod = 15; /* poll every 15 seconds */
63 afs_int32 hm_retry_RO=0; /* don't wait */
64 afs_int32 hm_retry_RW=0; /* don't wait */
65 afs_int32 hm_retry_int=0; /* don't wait */
67 void afs_CopyError(register struct vrequest *afrom, register struct vrequest *ato)
69 AFS_STATCNT(afs_CopyError);
73 if (afrom->accessError)
75 if (afrom->volumeError)
77 if (afrom->networkError)
78 ato->networkError = 1;
79 if (afrom->permWriteError)
80 ato->permWriteError = 1;
84 void afs_FinalizeReq(register struct vrequest *areq)
86 AFS_STATCNT(afs_FinalizeReq);
90 areq->accessError = 0;
91 areq->volumeError = 0;
92 areq->networkError = 0;
93 areq->permWriteError = 0;
98 int afs_CheckCode(afs_int32 acode, struct vrequest *areq, int where)
100 AFS_STATCNT(afs_CheckCode);
102 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKCODE,
103 ICL_TYPE_INT32, acode, ICL_TYPE_INT32, where);
105 if (!areq || !areq->initd)
107 if (areq->networkError)
111 if (areq->accessError)
113 if (areq->volumeError == VOLMISSING)
115 if (areq->volumeError == VOLBUSY)
117 if (acode == VNOVNODE)
124 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
129 * 0 if the vldb record for a specific volume is different from what
130 * we have cached -- perhaps the volume has moved.
131 * 1 if the vldb record is the same
132 * 2 if we can't tell if it's the same or not.
134 * If 0, the caller will probably start over at the beginning of our
135 * list of servers for this volume and try to find one that is up. If
136 * not 0, we will probably just keep plugging with what we have
137 * cached. If we fail to contact the VL server, we should just keep
138 * trying with the information we have, rather than failing. */
142 static int VLDB_Same (struct VenusFid *afid, struct vrequest *areq)
144 struct vrequest treq;
148 struct vldbentry tve;
149 struct nvldbentry ntve;
150 struct uvldbentry utve;
154 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
155 unsigned int changed;
156 struct server *(oldhosts[NMAXNSERVERS]);
158 AFS_STATCNT(CheckVLDB);
159 afs_FinalizeReq(areq);
161 if ((i = afs_InitReq(&treq, &afs_osi_cred))) return DUNNO;
162 tcell = afs_GetCell(afid->Cell, READ_LOCK);
163 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
165 VSleep(2); /* Better safe than sorry. */
166 tconn = afs_ConnByMHosts(tcell->cellHosts, tcell->vlport,
167 tcell->cellNum, &treq, SHARED_LOCK);
169 if (tconn->srvr->server->flags & SNO_LHOSTS) {
172 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
174 } else if (tconn->srvr->server->flags & SYES_LHOSTS) {
177 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
182 i = VL_GetEntryByNameU(tconn->id, bp, &v.utve);
184 if (!(tconn->srvr->server->flags & SVLSRV_UUID)) {
185 if (i == RXGEN_OPCODE) {
188 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
190 if (i == RXGEN_OPCODE) {
192 tconn->srvr->server->flags |= SNO_LHOSTS;
194 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
197 tconn->srvr->server->flags |= SYES_LHOSTS;
199 tconn->srvr->server->flags |= SVLSRV_UUID;
205 } while (afs_Analyze(tconn, i, NULL, &treq,
206 -1, /* no op code for this */
207 SHARED_LOCK, tcell));
209 afs_PutCell(tcell, READ_LOCK);
210 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
216 /* have info, copy into serverHost array */
218 tvp = afs_FindVolume(afid, WRITE_LOCK);
220 ObtainWriteLock(&tvp->lock,107);
221 for (i=0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
222 oldhosts[i] = tvp->serverHost[i];
226 InstallUVolumeEntry(tvp, &v.utve, afid->Cell, tcell, &treq);
228 else if (type == 1) {
229 InstallNVolumeEntry(tvp, &v.ntve, afid->Cell);
232 InstallVolumeEntry(tvp, &v.tve, afid->Cell);
235 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
238 for (--i;!changed && i >= 0; i--) {
239 if (tvp->serverHost[i] != oldhosts[i]) {
240 changed = 1; /* also happens if prefs change. big deal. */
244 ReleaseWriteLock(&tvp->lock);
245 afs_PutVolume(tvp, WRITE_LOCK);
247 else { /* can't find volume */
248 tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
250 afs_PutVolume(tvp, WRITE_LOCK);
256 return (changed ? DIFFERENT : SAME);
260 /*------------------------------------------------------------------------
261 * EXPORTED afs_Analyze
264 * Analyze the outcome of an RPC operation, taking whatever support
265 * actions are necessary.
268 * aconn : Ptr to the relevant connection on which the call was made.
269 * acode : The return code experienced by the RPC.
270 * afid : The FID of the file involved in the action. This argument
271 * may be null if none was involved.
272 * areq : The request record associated with this operation.
273 * op : which RPC we are analyzing.
274 * cellp : pointer to a cell struct. Must provide either fid or cell.
277 * Non-zero value if the related RPC operation should be retried,
281 * This routine is typically called in a do-while loop, causing the
282 * embedded RPC operation to be called repeatedly if appropriate
283 * until whatever error condition (if any) is intolerable.
289 * The retry return value is used by afs_StoreAllSegments to determine
290 * if this is a temporary or permanent error.
291 *------------------------------------------------------------------------*/
292 int afs_Analyze(register struct conn *aconn, afs_int32 acode,
293 struct VenusFid *afid, register struct vrequest *areq, int op,
294 afs_int32 locktype, struct cell *cellp)
300 afs_int32 shouldRetry = 0;
301 struct afs_stats_RPCErrors *aerrP;
304 AFS_STATCNT(afs_Analyze);
305 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
306 ICL_TYPE_POINTER, aconn,
307 ICL_TYPE_INT32, acode, ICL_TYPE_LONG, areq->uid);
309 aerrP = (struct afs_stats_RPCErrors *) 0;
311 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
312 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
314 afs_FinalizeReq(areq);
315 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
317 tvp = afs_FindVolume(afid, READ_LOCK);
319 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
320 (afid ? afid->Fid.Volume : 0),
321 (tvp->name ? tvp->name : ""),
322 ((tvp->serverHost[0] && tvp->serverHost[0]->cell) ?
323 tvp->serverHost[0]->cell->cellName : ""));
325 for (i=0; i < MAXHOSTS; i++) {
326 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
327 tvp->status[i] = not_busy;
329 if (tvp->status[i] == not_busy)
332 afs_PutVolume(tvp, READ_LOCK);
334 afs_warnuser("afs: Waiting for busy volume %u\n",
335 (afid ? afid->Fid.Volume : 0));
338 if (areq->busyCount > 100) {
340 (aerrP->err_Volume)++;
341 areq->volumeError = VOLBUSY;
344 VSleep(afs_BusyWaitPeriod); /* poll periodically */
346 return shouldRetry; /* should retry */
350 if (!areq->volumeError) {
352 (aerrP->err_Network)++;
353 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
354 ((afid && afs_IsPrimaryCellNum(afid->Cell)) ||
355 (cellp && afs_IsPrimaryCell(cellp)))) {
357 afs_warnuser("afs: hard-mount waiting for a vlserver to return to service\n");
358 VSleep(hm_retry_int);
359 afs_CheckServers(1,cellp);
362 tvp = afs_FindVolume(afid, READ_LOCK);
363 if (!tvp || (tvp->states & VRO)) {
364 shouldRetry = hm_retry_RO;
366 shouldRetry = hm_retry_RW;
369 afs_PutVolume(tvp, READ_LOCK);
371 afs_warnuser("afs: hard-mount waiting for volume %u\n",
373 VSleep(hm_retry_int);
374 afs_CheckServers(1,cellp);
377 } /* if (hm_retry_int ... */
379 areq->networkError = 1;
385 /* Find server associated with this connection. */
390 /* If we previously took an error, mark this volume not busy */
391 if (areq->volumeError) {
392 tvp = afs_FindVolume(afid, READ_LOCK);
394 for (i=0; i<MAXHOSTS ; i++) {
395 if (tvp->serverHost[i] == tsp) {
396 tvp->status[i] = not_busy ;
399 afs_PutVolume(tvp, READ_LOCK);
403 afs_PutConn(aconn, locktype);
407 /* If network troubles, mark server as having bogued out again. */
408 /* VRESTARTING is < 0 because of backward compatibility issues
409 * with 3.4 file servers and older cache managers */
410 #ifdef AFS_64BIT_CLIENT
413 #endif /* AFS_64BIT_CLIENT */
414 if ((acode < 0) && (acode != VRESTARTING)) {
416 ForceNewConnections(sa); /*multi homed clients lock:afs_xsrvAddr?*/
418 (aerrP->err_Server)++;
421 if (acode == VBUSY || acode == VRESTARTING) {
422 if (acode == VBUSY) {
425 (aerrP->err_VolumeBusies)++;
427 else areq->busyCount = 1;
429 tvp = afs_FindVolume(afid, READ_LOCK);
431 for (i=0; i < MAXHOSTS ; i++ ) {
432 if (tvp->serverHost[i] == tsp) {
433 tvp->status[i] = rdwr_busy ; /* can't tell which yet */
434 /* to tell which, have to look at the op code. */
437 afs_PutVolume(tvp, READ_LOCK);
440 afs_warnuser("afs: Waiting for busy volume %u in cell %s\n",
441 (afid? afid->Fid.Volume : 0), tsp->cell->cellName);
442 VSleep(afs_BusyWaitPeriod); /* poll periodically */
447 else if (acode == VICETOKENDEAD || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
448 /* any rxkad error is treated as token expiration */
452 * I'm calling these errors protection errors, since they involve
453 * faulty authentication.
456 (aerrP->err_Protection)++;
458 tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
460 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
461 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s have expired\n",
462 tu->vid, aconn->srvr->server->cell->cellName);
464 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
465 tu->vid, aconn->srvr->server->cell->cellName, acode);
466 afs_PutUser(tu, READ_LOCK);
468 /* The else case shouldn't be possible and should probably be replaced by a panic? */
469 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
470 afs_warnuser("afs: Tokens for user %d for cell %s have expired\n",
471 areq->uid, aconn->srvr->server->cell->cellName);
473 afs_warnuser("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
474 areq->uid, aconn->srvr->server->cell->cellName, acode);
476 aconn->forceConnectFS = 0; /* don't check until new tokens set */
477 aconn->user->states |= UTokensBad;
478 shouldRetry = 1; /* Try again (as root). */
480 /* Check for access violation. */
481 else if (acode == EACCES) {
482 /* should mark access error in non-existent per-user global structure */
484 (aerrP->err_Protection)++;
485 areq->accessError = 1;
486 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
487 areq->permWriteError = 1;
490 /* check for ubik errors; treat them like crashed servers */
491 else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U+255) {
494 (aerrP->err_Server)++;
495 shouldRetry = 1; /* retryable (maybe one is working) */
496 VSleep(1); /* just in case */
498 /* Check for bad volume data base / missing volume. */
499 else if (acode == VSALVAGE || acode == VOFFLINE
500 || acode == VNOVOL || acode == VNOSERVICE || acode == VMOVED) {
505 areq->volumeError = VOLMISSING;
507 (aerrP->err_Volume)++;
508 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
509 same = VLDB_Same(afid, areq);
510 tvp = afs_FindVolume(afid, READ_LOCK);
512 for (i=0; i < MAXHOSTS && tvp->serverHost[i]; i++ ) {
513 if (tvp->serverHost[i] == tsp) {
514 if (tvp->status[i] == end_not_busy)
515 tvp->status[i] = offline ;
520 tvp->status[i] = not_busy; /* reset the others */
523 afs_PutVolume(tvp, READ_LOCK);
527 else if (acode >= ERROR_TABLE_BASE_VL
528 && acode <= ERROR_TABLE_BASE_VL + 255) /* vlserver errors */ {
530 areq->volumeError = VOLMISSING;
532 else if (acode >= 0) {
534 (aerrP->err_Other)++;
535 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
536 areq->permWriteError = 1;
537 shouldRetry = 0; /* Other random Vice error. */
538 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
539 VSleep(1); /* Just a hack for desperate times. */
541 (aerrP->err_Other)++;
542 shouldRetry = 1; /* packet was too big, please retry call */
545 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
546 /* If we get here, code < 0 and we have network/Server troubles.
547 * areq->networkError is not set here, since we always
548 * retry in case there is another server. However, if we find
549 * no connection (aconn == 0) we set the networkError flag.
551 afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
553 (aerrP->err_Server)++;
554 VSleep(1); /* Just a hack for desperate times. */
558 /* now unlock the connection and return */
559 afs_PutConn(aconn, locktype);
560 return (shouldRetry);