1 /* Copyright (C) 1995, 1989, 1998 Transarc Corporation - All rights reserved */
3 * (C) COPYRIGHT IBM CORPORATION 1987, 1988
4 * LICENSED MATERIALS - PROPERTY OF IBM
11 #include "../afs/param.h" /* Should be always first */
12 #include "../afs/stds.h"
13 #include "../afs/sysincludes.h" /* Standard vendor system headers */
16 #ifndef AFS_LINUX20_ENV
18 #include <netinet/in.h>
22 #include "../h/hashing.h"
24 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV)
25 #include <netinet/in_var.h>
29 #include "../afs/afsincludes.h" /* Afs-based standard headers */
30 #include "../afs/afs_stats.h" /* afs statistics */
31 #include "../afs/afs_util.h"
32 #include "../afs/afs_prototypes.h"
34 #if defined(AFS_SUN56_ENV)
36 #include <inet/common.h>
41 /* shouldn't do it this way, but for now will do */
42 #ifndef ERROR_TABLE_BASE_u
43 #define ERROR_TABLE_BASE_u (5376L)
44 #endif /* ubik error base define */
46 /* same hack for vlserver error base as for ubik error base */
47 #ifndef ERROR_TABLE_BASE_vl
48 #define ERROR_TABLE_BASE_vl (363520L)
49 #define VL_NOENT (363524L)
50 #endif /* vlserver error base define */
53 int afs_BusyWaitPeriod = 15; /* poll every 15 seconds */
56 afs_int32 hm_retry_RO=0; /* don't wait */
57 afs_int32 hm_retry_RW=0; /* don't wait */
58 afs_int32 hm_retry_int=0; /* don't wait */
60 void afs_CopyError(afrom, ato)
61 register struct vrequest *afrom;
62 register struct vrequest *ato;
65 AFS_STATCNT(afs_CopyError);
69 if (afrom->accessError)
71 if (afrom->volumeError)
73 if (afrom->networkError)
74 ato->networkError = 1;
75 if (afrom->permWriteError)
76 ato->permWriteError = 1;
81 void afs_FinalizeReq(areq)
82 register struct vrequest *areq;
85 AFS_STATCNT(afs_FinalizeReq);
89 areq->accessError = 0;
90 areq->volumeError = 0;
91 areq->networkError = 0;
92 areq->permWriteError = 0;
98 afs_CheckCode(acode, areq, where)
100 struct vrequest *areq;
104 AFS_STATCNT(afs_CheckCode);
106 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKCODE,
107 ICL_TYPE_INT32, acode, ICL_TYPE_INT32, where);
109 if (!areq || !areq->initd)
111 if (areq->networkError)
115 if (areq->accessError)
117 if (areq->volumeError == VOLMISSING)
119 if (areq->volumeError == VOLBUSY)
121 if (acode == VNOVNODE)
128 #define VSleep(at) afs_osi_Wait((at)*1000, 0, 0)
133 * 0 if the vldb record for a specific volume is different from what
134 * we have cached -- perhaps the volume has moved.
135 * 1 if the vldb record is the same
136 * 2 if we can't tell if it's the same or not.
138 * If 0, the caller will probably start over at the beginning of our
139 * list of servers for this volume and try to find one that is up. If
140 * not 0, we will probably just keep plugging with what we have
141 * cached. If we fail to contact the VL server, we should just keep
142 * trying with the information we have, rather than failing. */
146 static int VLDB_Same (afid, areq)
147 struct VenusFid *afid;
148 struct vrequest *areq;
150 struct vrequest treq;
154 struct vldbentry tve;
155 struct nvldbentry ntve;
156 struct uvldbentry utve;
160 char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */
161 unsigned int changed;
162 struct server *(oldhosts[NMAXNSERVERS]);
164 AFS_STATCNT(CheckVLDB);
165 afs_FinalizeReq(areq);
167 if (i = afs_InitReq(&treq, &afs_osi_cred)) return DUNNO;
168 tcell = afs_GetCell(afid->Cell, READ_LOCK);
169 bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
171 VSleep(2); /* Better safe than sorry. */
172 tconn = afs_ConnByMHosts(tcell->cellHosts, tcell->vlport,
173 tcell->cell, &treq, SHARED_LOCK);
175 if (tconn->srvr->server->flags & SNO_LHOSTS) {
177 #ifdef RX_ENABLE_LOCKS
179 #endif /* RX_ENABLE_LOCKS */
180 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
181 #ifdef RX_ENABLE_LOCKS
183 #endif /* RX_ENABLE_LOCKS */
184 } else if (tconn->srvr->server->flags & SYES_LHOSTS) {
186 #ifdef RX_ENABLE_LOCKS
188 #endif /* RX_ENABLE_LOCKS */
189 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
190 #ifdef RX_ENABLE_LOCKS
192 #endif /* RX_ENABLE_LOCKS */
195 #ifdef RX_ENABLE_LOCKS
197 #endif /* RX_ENABLE_LOCKS */
198 i = VL_GetEntryByNameU(tconn->id, bp, &v.utve);
199 #ifdef RX_ENABLE_LOCKS
201 #endif /* RX_ENABLE_LOCKS */
202 if (!(tconn->srvr->server->flags & SVLSRV_UUID)) {
203 if (i == RXGEN_OPCODE) {
205 #ifdef RX_ENABLE_LOCKS
207 #endif /* RX_ENABLE_LOCKS */
208 i = VL_GetEntryByNameN(tconn->id, bp, &v.ntve);
209 #ifdef RX_ENABLE_LOCKS
211 #endif /* RX_ENABLE_LOCKS */
212 if (i == RXGEN_OPCODE) {
214 tconn->srvr->server->flags |= SNO_LHOSTS;
215 #ifdef RX_ENABLE_LOCKS
217 #endif /* RX_ENABLE_LOCKS */
218 i = VL_GetEntryByNameO(tconn->id, bp, &v.tve);
219 #ifdef RX_ENABLE_LOCKS
221 #endif /* RX_ENABLE_LOCKS */
223 tconn->srvr->server->flags |= SYES_LHOSTS;
225 tconn->srvr->server->flags |= SVLSRV_UUID;
231 } while (afs_Analyze(tconn, i, (struct VenusFid *) 0, &treq,
232 -1, /* no op code for this */
233 SHARED_LOCK, tcell));
235 afs_PutCell(tcell, READ_LOCK);
236 afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
242 /* have info, copy into serverHost array */
244 tvp = afs_FindVolume(afid, WRITE_LOCK);
246 ObtainWriteLock(&tvp->lock,107);
247 for (i=0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
248 oldhosts[i] = tvp->serverHost[i];
252 InstallUVolumeEntry(tvp, &v.utve, afid->Cell, tcell, &treq);
254 else if (type == 1) {
255 InstallNVolumeEntry(tvp, &v.ntve, afid->Cell);
258 InstallVolumeEntry(tvp, &v.tve, afid->Cell);
261 if (i < NMAXNSERVERS && tvp->serverHost[i]) {
264 for (--i;!changed && i >= 0; i--) {
265 if (tvp->serverHost[i] != oldhosts[i]) {
266 changed = 1; /* also happens if prefs change. big deal. */
270 ReleaseWriteLock(&tvp->lock);
271 afs_PutVolume(tvp, WRITE_LOCK);
273 else { /* can't find volume */
274 tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
276 afs_PutVolume(tvp, WRITE_LOCK);
282 return (changed ? DIFFERENT : SAME);
286 /*------------------------------------------------------------------------
287 * EXPORTED afs_Analyze
290 * Analyze the outcome of an RPC operation, taking whatever support
291 * actions are necessary.
294 * aconn : Ptr to the relevant connection on which the call was made.
295 * acode : The return code experienced by the RPC.
296 * afid : The FID of the file involved in the action. This argument
297 * may be null if none was involved.
298 * areq : The request record associated with this operation.
299 * op : which RPC we are analyzing.
300 * cellp : pointer to a cell struct. Must provide either fid or cell.
303 * Non-zero value if the related RPC operation should be retried,
307 * This routine is typically called in a do-while loop, causing the
308 * embedded RPC operation to be called repeatedly if appropriate
309 * until whatever error condition (if any) is intolerable.
315 * The retry return value is used by afs_StoreAllSegments to determine
316 * if this is a temporary or permanent error.
317 *------------------------------------------------------------------------*/
318 int afs_Analyze(aconn, acode, afid, areq, op, locktype, cellp)
319 register struct conn *aconn;
321 register struct vrequest *areq;
322 struct VenusFid *afid;
332 afs_int32 shouldRetry = 0;
333 struct afs_stats_RPCErrors *aerrP;
336 AFS_STATCNT(afs_Analyze);
337 afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
338 ICL_TYPE_POINTER, aconn,
339 ICL_TYPE_INT32, acode, ICL_TYPE_LONG, areq->uid);
341 aerrP = (struct afs_stats_RPCErrors *) 0;
343 if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
344 aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
346 afs_FinalizeReq(areq);
347 if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */
349 tvp = afs_FindVolume(afid, READ_LOCK);
351 afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
352 (afid ? afid->Fid.Volume : 0),
353 (tvp->name ? tvp->name : ""),
354 ((tvp->serverHost[0] && tvp->serverHost[0]->cell) ?
355 tvp->serverHost[0]->cell->cellName : ""));
357 for (i=0; i < MAXHOSTS; i++) {
358 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
359 tvp->status[i] = not_busy;
361 if (tvp->status[i] == not_busy)
364 afs_PutVolume(tvp, READ_LOCK);
366 afs_warnuser("afs: Waiting for busy volume %u\n",
367 (afid ? afid->Fid.Volume : 0));
370 if (areq->busyCount > 100) {
372 (aerrP->err_Volume)++;
373 areq->volumeError = VOLBUSY;
376 VSleep(afs_BusyWaitPeriod); /* poll periodically */
378 return shouldRetry; /* should retry */
382 if (!areq->volumeError) {
384 (aerrP->err_Network)++;
385 if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */
386 ((afid && afid->Cell == LOCALCELL) ||
387 (cellp && cellp->cell == LOCALCELL))) {
389 afs_warnuser("afs: hard-mount waiting for a vlserver to return to service\n");
390 VSleep(hm_retry_int);
391 afs_CheckServers(1,cellp);
394 tvp = afs_FindVolume(afid, READ_LOCK);
395 if (!tvp || (tvp->states & VRO)) {
396 shouldRetry = hm_retry_RO;
398 shouldRetry = hm_retry_RW;
401 afs_PutVolume(tvp, READ_LOCK);
403 afs_warnuser("afs: hard-mount waiting for volume %u\n",
405 VSleep(hm_retry_int);
406 afs_CheckServers(1,cellp);
409 } /* if (hm_retry_int ... */
411 areq->networkError = 1;
417 /* Find server associated with this connection. */
422 /* If we previously took an error, mark this volume not busy */
423 if (areq->volumeError) {
424 tvp = afs_FindVolume(afid, READ_LOCK);
426 for (i=0; i<MAXHOSTS ; i++) {
427 if (tvp->serverHost[i] == tsp) {
428 tvp->status[i] = not_busy ;
431 afs_PutVolume(tvp, READ_LOCK);
435 afs_PutConn(aconn, locktype);
439 /* If network troubles, mark server as having bogued out again. */
440 /* VRESTARTING is < 0 because of backward compatibility issues
441 * with 3.4 file servers and older cache managers */
442 if ((acode < 0) && (acode != VRESTARTING)) {
444 ForceNewConnections(sa); /*multi homed clients lock:afs_xsrvAddr?*/
446 (aerrP->err_Server)++;
449 if (acode == VBUSY || acode == VRESTARTING) {
450 if (acode == VBUSY) {
453 (aerrP->err_VolumeBusies)++;
455 else areq->busyCount = 1;
457 tvp = afs_FindVolume(afid, READ_LOCK);
459 for (i=0; i < MAXHOSTS ; i++ ) {
460 if (tvp->serverHost[i] == tsp) {
461 tvp->status[i] = rdwr_busy ; /* can't tell which yet */
462 /* to tell which, have to look at the op code. */
465 afs_PutVolume(tvp, READ_LOCK);
468 afs_warnuser("afs: Waiting for busy volume %u in cell %s\n",
469 (afid? afid->Fid.Volume : 0), tsp->cell->cellName);
470 VSleep(afs_BusyWaitPeriod); /* poll periodically */
475 else if (acode == VICETOKENDEAD || (acode & ~0xff) == ERROR_TABLE_BASE_rxk) {
476 /* any rxkad error is treated as token expiration */
480 * I'm calling these errors protection errors, since they involve
481 * faulty authentication.
484 (aerrP->err_Protection)++;
486 tu = afs_FindUser(areq->uid, tsp->cell->cell, READ_LOCK);
488 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
489 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s have expired\n",
490 tu->vid, aconn->srvr->server->cell->cellName);
492 afs_warnuser("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n",
493 tu->vid, aconn->srvr->server->cell->cellName, acode);
494 afs_PutUser(tu, READ_LOCK);
496 /* The else case shouldn't be possible and should probably be replaced by a panic? */
497 if ((acode == VICETOKENDEAD) || (acode == RXKADEXPIRED))
498 afs_warnuser("afs: Tokens for user %d for cell %s have expired\n",
499 areq->uid, aconn->srvr->server->cell->cellName);
501 afs_warnuser("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n",
502 areq->uid, aconn->srvr->server->cell->cellName, acode);
504 aconn->forceConnectFS = 0; /* don't check until new tokens set */
505 aconn->user->states |= UTokensBad;
506 shouldRetry = 1; /* Try again (as root). */
508 /* Check for access violation. */
509 else if (acode == EACCES) {
510 /* should mark access error in non-existent per-user global structure */
512 (aerrP->err_Protection)++;
513 areq->accessError = 1;
514 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
515 areq->permWriteError = 1;
518 /* check for ubik errors; treat them like crashed servers */
519 else if (acode >= ERROR_TABLE_BASE_u && acode < ERROR_TABLE_BASE_u+255) {
522 (aerrP->err_Server)++;
523 shouldRetry = 1; /* retryable (maybe one is working) */
524 VSleep(1); /* just in case */
526 /* Check for bad volume data base / missing volume. */
527 else if (acode == VSALVAGE || acode == VOFFLINE
528 || acode == VNOVOL || acode == VNOSERVICE || acode == VMOVED) {
533 areq->volumeError = VOLMISSING;
535 (aerrP->err_Volume)++;
536 if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
537 same = VLDB_Same(afid, areq);
538 tvp = afs_FindVolume(afid, READ_LOCK);
540 for (i=0; i < MAXHOSTS && tvp->serverHost[i]; i++ ) {
541 if (tvp->serverHost[i] == tsp) {
542 if (tvp->status[i] == end_not_busy)
543 tvp->status[i] = offline ;
548 tvp->status[i] = not_busy; /* reset the others */
551 afs_PutVolume(tvp, READ_LOCK);
555 else if (acode >= ERROR_TABLE_BASE_vl
556 && acode <= ERROR_TABLE_BASE_vl + 255) /* vlserver errors */ {
558 areq->volumeError = VOLMISSING;
560 else if (acode >= 0) {
562 (aerrP->err_Other)++;
563 if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
564 areq->permWriteError = 1;
565 shouldRetry = 0; /* Other random Vice error. */
566 } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */
567 VSleep(1); /* Just a hack for desperate times. */
569 (aerrP->err_Other)++;
570 shouldRetry = 1; /* packet was too big, please retry call */
573 if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
574 /* If we get here, code < 0 and we have network/Server troubles.
575 * areq->networkError is not set here, since we always
576 * retry in case there is another server. However, if we find
577 * no connection (aconn == 0) we set the networkError flag.
579 afs_MarkServerUpOrDown(sa, SRVR_ISDOWN);
581 (aerrP->err_Server)++;
582 VSleep(1); /* Just a hack for desperate times. */
586 /* now unlock the connection and return */
587 afs_PutConn(aconn, locktype);
588 return (shouldRetry);