src/afs/afs_analyze.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /*
  11  * Implements:
  12  */
  13 #include <afsconfig.h>
  14 #include "afs/param.h"
  15
  16
  17 #include "afs/stds.h"
  18 #include "afs/sysincludes.h"    /* Standard vendor system headers */
  19
  20 #ifndef UKERNEL
  21 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV)
  22 #include <net/if.h>
  23 #include <netinet/in.h>
  24 #endif
  25
  26 #ifdef AFS_SGI62_ENV
  27 #include "h/hashing.h"
  28 #endif
  29 #if !defined(AFS_HPUX110_ENV) && !defined(AFS_LINUX20_ENV) && !defined(AFS_FBSD_ENV) && !defined(AFS_DARWIN_ENV)
  30 #include <netinet/in_var.h>
  31 #endif
  32 #endif /* !UKERNEL */
  33
  34 #include "afsincludes.h"        /* Afs-based standard headers */
  35 #include "afs/afs_stats.h"      /* afs statistics */
  36 #include "afs/afs_util.h"
  37 #include "afs/unified_afs.h"
  38
  39 #if     defined(AFS_SUN5_ENV)
  40 #include <inet/led.h>
  41 #include <inet/common.h>
  42 #include <netinet/ip6.h>
  43 #include <inet/ip.h>
  44 #endif
  45
  46 /* shouldn't do it this way, but for now will do */
  47 #ifndef ERROR_TABLE_BASE_U
  48 #define ERROR_TABLE_BASE_U      (5376L)
  49 #endif /* ubik error base define */
  50
  51 /* shouldn't do it this way, but for now will do */
  52 #ifndef ERROR_TABLE_BASE_uae
  53 #define ERROR_TABLE_BASE_uae    (49733376L)
  54 #endif /* unified afs error base define */
  55
  56 /* same hack for vlserver error base as for ubik error base */
  57 #ifndef ERROR_TABLE_BASE_VL
  58 #define ERROR_TABLE_BASE_VL     (363520L)
  59 #define VL_NOENT                (363524L)
  60 #endif /* vlserver error base define */
  61
  62
  63 int afs_BusyWaitPeriod = 15;    /**< poll period, in seconds */
  64
  65 afs_int32 hm_retry_RO = 0;      /**< enable read-only hard-mount retry */
  66 afs_int32 hm_retry_RW = 0;      /**< enable read-write hard-mount retry */
  67 afs_int32 hm_retry_int = 0;     /**< hard-mount retry interval, in seconds */
  68
  69 #define VSleep(at)      afs_osi_Wait((at)*1000, 0, 0)
  70
  71
  72 int lastcode;
  73 #define DIFFERENT 0
  74 #define SAME 1
  75 #define DUNNO 2
  76 /*!
  77  * \brief
  78  *      Request vldb record to determined if it has changed.
  79  *
  80  * \retval 0 if the vldb record for a specific volume is different from what
  81  *           we have cached -- perhaps the volume has moved.
  82  * \retval 1 if the vldb record is the same
  83  * \retval 2 if we can't tell if it's the same or not.
  84  *
  85  * \note
  86  *      If 0 returned, the caller will probably start over at the beginning of our
  87  *      list of servers for this volume and try to find one that is up.  If
  88  *      not 0, we will probably just keep plugging with what we have
  89  *      cached.   If we fail to contact the VL server, we  should just keep
  90  *      trying with the information we have, rather than failing.
  91  */
  92 static int
  93 VLDB_Same(struct VenusFid *afid, struct vrequest *areq)
  94 {
  95     struct vrequest treq;
  96     struct afs_conn *tconn;
  97     int i, type = 0;
  98     union {
  99         struct vldbentry tve;
 100         struct nvldbentry ntve;
 101         struct uvldbentry utve;
 102     } *v;
 103     struct volume *tvp;
 104     struct cell *tcell;
 105     char *bp, tbuf[CVBS];       /* biggest volume id is 2^32, ~ 4*10^9 */
 106     unsigned int changed;
 107     struct server *(oldhosts[NMAXNSERVERS]);
 108     struct rx_connection *rxconn;
 109
 110     AFS_STATCNT(CheckVLDB);
 111     afs_FinalizeReq(areq);
 112
 113     if ((i = afs_InitReq(&treq, afs_osi_credp)))
 114         return DUNNO;
 115     v = afs_osi_Alloc(sizeof(*v));
 116     osi_Assert(v != NULL);
 117     tcell = afs_GetCell(afid->Cell, READ_LOCK);
 118     bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume);
 119     do {
 120         VSleep(2);              /* Better safe than sorry. */
 121         tconn =
 122             afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum,
 123                              &treq, SHARED_LOCK, 0, &rxconn);
 124         if (tconn) {
 125             if ( tconn->parent->srvr->server->flags & SNO_LHOSTS) {
 126                 type = 0;
 127                 RX_AFS_GUNLOCK();
 128                 i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
 129                 RX_AFS_GLOCK();
 130             } else if (tconn->parent->srvr->server->flags & SYES_LHOSTS) {
 131                 type = 1;
 132                 RX_AFS_GUNLOCK();
 133                 i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
 134                 RX_AFS_GLOCK();
 135             } else {
 136                 type = 2;
 137                 RX_AFS_GUNLOCK();
 138                 i = VL_GetEntryByNameU(rxconn, bp, &v->utve);
 139                 RX_AFS_GLOCK();
 140                 if (!(tconn->parent->srvr->server->flags & SVLSRV_UUID)) {
 141                     if (i == RXGEN_OPCODE) {
 142                         type = 1;
 143                         RX_AFS_GUNLOCK();
 144                         i = VL_GetEntryByNameN(rxconn, bp, &v->ntve);
 145                         RX_AFS_GLOCK();
 146                         if (i == RXGEN_OPCODE) {
 147                             type = 0;
 148                             tconn->parent->srvr->server->flags |= SNO_LHOSTS;
 149                             RX_AFS_GUNLOCK();
 150                             i = VL_GetEntryByNameO(rxconn, bp, &v->tve);
 151                             RX_AFS_GLOCK();
 152                         } else if (!i)
 153                             tconn->parent->srvr->server->flags |= SYES_LHOSTS;
 154                     } else if (!i)
 155                         tconn->parent->srvr->server->flags |= SVLSRV_UUID;
 156                 }
 157                 lastcode = i;
 158             }
 159         } else
 160             i = -1;
 161     } while (afs_Analyze(tconn, rxconn, i, NULL, &treq, -1,     /* no op code for this */
 162                          SHARED_LOCK, tcell));
 163
 164     afs_PutCell(tcell, READ_LOCK);
 165     afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid,
 166                ICL_TYPE_INT32, i);
 167
 168     if (i) {
 169         afs_osi_Free(v, sizeof(*v));
 170         return DUNNO;
 171     }
 172     /* have info, copy into serverHost array */
 173     changed = 0;
 174     tvp = afs_FindVolume(afid, WRITE_LOCK);
 175     if (tvp) {
 176         ObtainWriteLock(&tvp->lock, 107);
 177         for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) {
 178             oldhosts[i] = tvp->serverHost[i];
 179         }
 180         ReleaseWriteLock(&tvp->lock);
 181
 182         if (type == 2) {
 183             LockAndInstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq);
 184         } else if (type == 1) {
 185             LockAndInstallNVolumeEntry(tvp, &v->ntve, afid->Cell);
 186         } else {
 187             LockAndInstallVolumeEntry(tvp, &v->tve, afid->Cell);
 188         }
 189
 190         if (i < NMAXNSERVERS && tvp->serverHost[i]) {
 191             changed = 1;
 192         }
 193         for (--i; !changed && i >= 0; i--) {
 194             if (tvp->serverHost[i] != oldhosts[i]) {
 195                 changed = 1;    /* also happens if prefs change.  big deal. */
 196             }
 197         }
 198
 199         ReleaseWriteLock(&tvp->lock);
 200         afs_PutVolume(tvp, WRITE_LOCK);
 201     } else {                    /* can't find volume */
 202         tvp = afs_GetVolume(afid, &treq, WRITE_LOCK);
 203         if (tvp) {
 204             afs_PutVolume(tvp, WRITE_LOCK);
 205             afs_osi_Free(v, sizeof(*v));
 206             return DIFFERENT;
 207         } else {
 208             afs_osi_Free(v, sizeof(*v));
 209             return DUNNO;
 210         }
 211     }
 212
 213     afs_osi_Free(v, sizeof(*v));
 214     return (changed ? DIFFERENT : SAME);
 215 }                               /*VLDB_Same */
 216
 217 /*!
 218  * \brief
 219  *      Mark a server as invalid for further attempts of this request only.
 220  *
 221  * \param[in,out] areq  The request record associated with this operation.
 222  * \param[in]     afid  The FID of the file involved in the action.  This argument
 223  *                      may be null if none was involved.
 224  * \param[in,out] tsp   pointer to a server struct for the server we wish to
 225  *                      blacklist.
 226  *
 227  * \returns
 228  *      Non-zero value if further servers are available to try,
 229  *      zero otherwise.
 230  *
 231  * \note
 232  *      This routine is typically called in situations where we believe
 233  *      one server out of a pool may have an error condition.
 234  *
 235  * \note
 236  *      The afs_Conn* routines use the list of invalidated servers to
 237  *      avoid reusing a server marked as invalid for this request.
 238  */
 239 static afs_int32
 240 afs_BlackListOnce(struct vrequest *areq, struct VenusFid *afid,
 241                   struct server *tsp)
 242 {
 243     struct volume *tvp;
 244     afs_int32 i;
 245     afs_int32 serversleft = 0;
 246
 247     if (afid) {
 248         tvp = afs_FindVolume(afid, READ_LOCK);
 249         if (tvp) {
 250             for (i = 0; i < AFS_MAXHOSTS; i++) {
 251                 if (tvp->serverHost[i] == tsp) {
 252                     areq->skipserver[i] = 1;
 253                 }
 254                 if (tvp->serverHost[i] &&
 255                     (tvp->serverHost[i]->addr->sa_flags &
 256                       SRVR_ISDOWN)) {
 257                     areq->skipserver[i] = 1;
 258                 }
 259             }
 260             for (i = 0; i < AFS_MAXHOSTS; i++) {
 261                 if (tvp->serverHost[i] && areq->skipserver[i] == 0) {
 262                     serversleft = 1;
 263                     break;
 264                 }
 265             }
 266             afs_PutVolume(tvp, READ_LOCK);
 267             return serversleft;
 268         }
 269     }
 270     return serversleft;
 271 }
 272
 273 /*!
 274  * \brief
 275  *      Analyze the outcome of an RPC operation, taking whatever support
 276  *      actions are necessary.
 277  *
 278  * \param[in]     afid   The FID of the file involved in the action.  This argument
 279  *                       may be null if none was involved.
 280  * \param[in]     op     which RPC we are analyzing.
 281  * \param[in,out] avp    A pointer to the struct volume, if we already have one.
 282  *
 283  * \returns
 284  *      Non-zero value if the related RPC operation can be retried,
 285  *      zero otherwise.
 286  *
 287  * \note
 288  *      This routine is called when we got a network error,
 289  *      and discards state if the operation was a data-mutating
 290  *      operation.
 291  */
 292 static int
 293 afs_ClearStatus(struct VenusFid *afid, int op, struct volume *avp)
 294 {
 295     struct volume *tvp = NULL;
 296
 297     /* if it's not a write op, we have nothing to veto and shouldn't clear. */
 298     if (!AFS_STATS_FS_RPCIDXES_ISWRITE(op)) {
 299         return 1;
 300     }
 301
 302     if (avp)
 303         tvp = avp;
 304     else if (afid)
 305         tvp = afs_FindVolume(afid, READ_LOCK);
 306
 307     /* don't assume just discarding will fix if no cached volume */
 308     if (tvp) {
 309         struct vcache *tvc;
 310         ObtainReadLock(&afs_xvcache);
 311         if ((tvc = afs_FindVCache(afid, 0, 0))) {
 312             ReleaseReadLock(&afs_xvcache);
 313             tvc->f.states &= ~(CStatd | CUnique);
 314             afs_PutVCache(tvc);
 315         } else {
 316             ReleaseReadLock(&afs_xvcache);
 317         }
 318         if (!avp)
 319             afs_PutVolume(tvp, READ_LOCK);
 320     }
 321
 322     if (AFS_STATS_FS_RPCIDXES_WRITE_RETRIABLE(op))
 323         return 1;
 324
 325     /* not retriable: we may have raced ourselves */
 326     return 0;
 327 }
 328
 329 /*!
 330  * \brief
 331  *      Analyze the outcome of an RPC operation, taking whatever support
 332  *      actions are necessary.
 333  *
 334  * \param[in]     aconn  Ptr to the relevant connection on which the call was made.
 335  * \param[in]     acode  The return code experienced by the RPC.
 336  * \param[in]     fid    The FID of the file involved in the action.  This argument
 337  *                       may be null if none was involved.
 338  * \param[in,out] areq   The request record associated with this operation.
 339  * \param[in]     op     which RPC we are analyzing.
 340  * \param[in]     cellp  pointer to a cell struct.  Must provide either fid or cell.
 341  *
 342  * \returns
 343  *      Non-zero value if the related RPC operation should be retried,
 344  *      zero otherwise.
 345  *
 346  * \note
 347  *      This routine is typically called in a do-while loop, causing the
 348  *      embedded RPC operation to be called repeatedly if appropriate
 349  *      until whatever error condition (if any) is intolerable.
 350  *
 351  * \note
 352  *      The retry return value is used by afs_StoreAllSegments to determine
 353  *      if this is a temporary or permanent error.
 354  */
 355 int
 356 afs_Analyze(struct afs_conn *aconn, struct rx_connection *rxconn,
 357             afs_int32 acode, struct VenusFid *afid, struct vrequest *areq,
 358             int op, afs_int32 locktype, struct cell *cellp)
 359 {
 360     afs_int32 i;
 361     struct srvAddr *sa;
 362     struct server *tsp;
 363     struct volume *tvp = NULL;
 364     afs_int32 shouldRetry = 0;
 365     afs_int32 serversleft = 1;
 366     struct afs_stats_RPCErrors *aerrP;
 367     afs_uint32 address;
 368
 369     if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
 370         /* On reconnection, act as connected. XXX: for now.... */
 371         /* SXW - This may get very tired after a while. We should try and
 372          *       intercept all RPCs before they get here ... */
 373         /*printf("afs_Analyze: disconnected\n");*/
 374         afs_FinalizeReq(areq);
 375         if (aconn) {
 376             /* SXW - I suspect that this will _never_ happen - we shouldn't
 377              *       get a connection because we're disconnected !!!*/
 378             afs_PutConn(aconn, rxconn, locktype);
 379         }
 380         return 0;
 381     }
 382
 383     AFS_STATCNT(afs_Analyze);
 384     afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op,
 385                ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG,
 386                areq->uid);
 387
 388     aerrP = (struct afs_stats_RPCErrors *)0;
 389
 390     if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS))
 391         aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]);
 392
 393     afs_FinalizeReq(areq);
 394     if (!aconn && areq->busyCount) {    /* one RPC or more got VBUSY/VRESTARTING */
 395
 396         tvp = afs_FindVolume(afid, READ_LOCK);
 397         if (tvp) {
 398             afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n",
 399                          (afid ? afid->Fid.Volume : 0),
 400                          (tvp->name ? tvp->name : ""),
 401                          ((tvp->serverHost[0]
 402                            && tvp->serverHost[0]->cell) ? tvp->serverHost[0]->
 403                           cell->cellName : ""));
 404
 405             for (i = 0; i < AFS_MAXHOSTS; i++) {
 406                 if (tvp->status[i] != not_busy && tvp->status[i] != offline) {
 407                     tvp->status[i] = not_busy;
 408                 }
 409                 if (tvp->status[i] == not_busy)
 410                     shouldRetry = 1;
 411             }
 412             afs_PutVolume(tvp, READ_LOCK);
 413         } else {
 414             afs_warnuser("afs: Waiting for busy volume %u\n",
 415                          (afid ? afid->Fid.Volume : 0));
 416         }
 417
 418         if (areq->busyCount > 100) {
 419             if (aerrP)
 420                 (aerrP->err_Volume)++;
 421             areq->volumeError = VOLBUSY;
 422             shouldRetry = 0;
 423         } else {
 424             VSleep(afs_BusyWaitPeriod); /* poll periodically */
 425         }
 426         if (shouldRetry != 0)
 427             areq->busyCount++;
 428
 429         return shouldRetry;     /* should retry */
 430     }
 431
 432     if (!aconn || !aconn->parent->srvr) {
 433         if (!areq->volumeError) {
 434             if (aerrP)
 435                 (aerrP->err_Network)++;
 436             if (hm_retry_int && !(areq->flags & O_NONBLOCK) &&  /* "hard" mount */
 437                 ((afid && afs_IsPrimaryCellNum(afid->Cell))
 438                  || (cellp && afs_IsPrimaryCell(cellp)))) {
 439                 if (!afid) {
 440                     static int afs_vl_hm = 0;
 441                     int warn = 0;
 442                     if (!afs_vl_hm) {
 443                         afs_vl_hm = warn = 1;
 444                     }
 445                     if (warn) {
 446                         afs_warnuser
 447                             ("afs: hard-mount waiting for a vlserver to return to service\n");
 448                     }
 449                     VSleep(hm_retry_int);
 450                     afs_CheckServers(1, cellp);
 451                     shouldRetry = 1;
 452
 453                     if (warn) {
 454                         afs_vl_hm = 0;
 455                     }
 456                 } else {
 457                     static int afs_unknown_vhm = 0;
 458                     int warn = 0, vp_vhm = 0;
 459
 460                     tvp = afs_FindVolume(afid, READ_LOCK);
 461                     if (!tvp || (tvp->states & VRO)) {
 462                         shouldRetry = hm_retry_RO;
 463                     } else {
 464                         shouldRetry = hm_retry_RW;
 465                     }
 466
 467                     /* Set 'warn' if we should afs_warnuser. Only let one
 468                      * caller call afs_warnuser per hm_retry_int interval per
 469                      * volume. */
 470                     if (shouldRetry) {
 471                         if (tvp) {
 472                             if (!(tvp->states & VHardMount)) {
 473                                 tvp->states |= VHardMount;
 474                                 warn = vp_vhm = 1;
 475                             }
 476                         } else {
 477                             if (!afs_unknown_vhm) {
 478                                 afs_unknown_vhm = 1;
 479                                 warn = 1;
 480                             }
 481                         }
 482                     }
 483
 484                     if (tvp)
 485                         afs_PutVolume(tvp, READ_LOCK);
 486
 487                     if (shouldRetry) {
 488                         if (warn) {
 489                             afs_warnuser
 490                                 ("afs: hard-mount waiting for volume %u\n",
 491                                  afid->Fid.Volume);
 492                         }
 493
 494                         VSleep(hm_retry_int);
 495                         afs_CheckServers(1, cellp);
 496                         /* clear the black listed servers on this request. */
 497                         memset(areq->skipserver, 0, sizeof(areq->skipserver));
 498
 499                         if (vp_vhm) {
 500                             tvp = afs_FindVolume(afid, READ_LOCK);
 501                             if (tvp) {
 502                                 tvp->states &= ~VHardMount;
 503                                 afs_PutVolume(tvp, READ_LOCK);
 504                             }
 505                         } else if (warn) {
 506                             afs_unknown_vhm = 0;
 507                         }
 508                     }
 509                 }
 510             } /* if (hm_retry_int ... */
 511             else {
 512                 if (acode == RX_MSGSIZE)
 513                     shouldRetry = 1;
 514                 else {
 515                     areq->networkError = 1;
 516                     /* do not promote to shouldRetry if not already */
 517                     if (afs_ClearStatus(afid, op, NULL) == 0)
 518                         shouldRetry = 0;
 519                 }
 520             }
 521         }
 522         return shouldRetry;
 523     }
 524
 525     /* Find server associated with this connection. */
 526     sa = aconn->parent->srvr;
 527     tsp = sa->server;
 528     address = ntohl(sa->sa_ip);
 529
 530     /* Before we do anything with acode, make sure we translate it back to
 531      * a system error */
 532     if ((acode & ~0xff) == ERROR_TABLE_BASE_uae)
 533         acode = et_to_sys_error(acode);
 534
 535     if (acode == 0) {
 536         /* If we previously took an error, mark this volume not busy */
 537         if (areq->volumeError) {
 538             tvp = afs_FindVolume(afid, READ_LOCK);
 539             if (tvp) {
 540                 for (i = 0; i < AFS_MAXHOSTS; i++) {
 541                     if (tvp->serverHost[i] == tsp) {
 542                         tvp->status[i] = not_busy;
 543                     }
 544                 }
 545                 afs_PutVolume(tvp, READ_LOCK);
 546             }
 547         }
 548
 549         afs_PutConn(aconn, rxconn, locktype);
 550         return 0;
 551     }
 552
 553     /* If network troubles, mark server as having bogued out again. */
 554     /* VRESTARTING is < 0 because of backward compatibility issues
 555      * with 3.4 file servers and older cache managers */
 556 #ifdef AFS_64BIT_CLIENT
 557     if (acode == -455)
 558         acode = 455;
 559 #endif /* AFS_64BIT_CLIENT */
 560     if ((acode < 0) && (acode != VRESTARTING)) {
 561         if (acode == RX_MSGSIZE || acode == RX_CALL_BUSY) {
 562             shouldRetry = 1;
 563             goto out;
 564         }
 565         if (acode == RX_CALL_TIMEOUT || acode == RX_CALL_IDLE) {
 566             serversleft = afs_BlackListOnce(areq, afid, tsp);
 567             if (afid)
 568                 tvp = afs_FindVolume(afid, READ_LOCK);
 569             if ((serversleft == 0) && tvp &&
 570                 ((tvp->states & VRO) || (tvp->states & VBackup))) {
 571                 shouldRetry = 0;
 572             } else {
 573                 shouldRetry = 1;
 574             }
 575             if (!afid || !tvp || (tvp->states & VRO))
 576                 areq->idleError++;
 577             else if (afs_ClearStatus(afid, op, tvp) == 0)
 578                 shouldRetry = 0;
 579
 580             if (tvp)
 581                 afs_PutVolume(tvp, READ_LOCK);
 582             /* By doing this, we avoid ever marking a server down
 583              * in an idle timeout case. That's because the server is
 584              * still responding and may only be letting a single vnode
 585              * time out. We otherwise risk having the server continually
 586              * be marked down, then up, then down again...
 587              */
 588             goto out;
 589         }
 590         afs_ServerDown(sa, acode);
 591         ForceNewConnections(sa); /* multi homed clients lock:afs_xsrvAddr? */
 592         if (aerrP)
 593             (aerrP->err_Server)++;
 594     }
 595
 596     if (acode == VBUSY || acode == VRESTARTING) {
 597         if (acode == VBUSY) {
 598             areq->busyCount++;
 599             if (aerrP)
 600                 (aerrP->err_VolumeBusies)++;
 601         } else
 602             areq->busyCount = 1;
 603
 604         tvp = afs_FindVolume(afid, READ_LOCK);
 605         if (tvp) {
 606             for (i = 0; i < AFS_MAXHOSTS; i++) {
 607                 if (tvp->serverHost[i] == tsp) {
 608                     tvp->status[i] = rdwr_busy; /* can't tell which yet */
 609                     /* to tell which, have to look at the op code. */
 610                 }
 611             }
 612             afs_PutVolume(tvp, READ_LOCK);
 613         } else {
 614             afs_warnuser("afs: Waiting for busy volume %u in cell %s (server %d.%d.%d.%d)\n",
 615                          (afid ? afid->Fid.Volume : 0), tsp->cell->cellName,
 616                          (address >> 24), (address >> 16) & 0xff,
 617                          (address >> 8) & 0xff, (address) & 0xff);
 618             VSleep(afs_BusyWaitPeriod); /* poll periodically */
 619         }
 620         shouldRetry = 1;
 621         acode = 0;
 622     } else if (acode == VICETOKENDEAD
 623                || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) {
 624         /* any rxkad error is treated as token expiration */
 625         struct unixuser *tu;
 626         /*
 627          * I'm calling these errors protection errors, since they involve
 628          * faulty authentication.
 629          */
 630         if (aerrP)
 631             (aerrP->err_Protection)++;
 632
 633         tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK);
 634         if (tu) {
 635             if (acode == VICETOKENDEAD) {
 636                 aconn->forceConnectFS = 1;
 637             } else if (acode == RXKADEXPIRED) {
 638                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 639                 aconn->parent->user->states |= UTokensBad;
 640                 afs_NotifyUser(tu, UTokensDropped);
 641                 afs_warnuser
 642                     ("afs: Tokens for user of AFS id %d for cell %s have expired (server %d.%d.%d.%d)\n",
 643                      tu->viceId, aconn->parent->srvr->server->cell->cellName,
 644                      (address >> 24), (address >> 16) & 0xff,
 645                      (address >> 8) & 0xff, (address) & 0xff);
 646             } else {
 647                 serversleft = afs_BlackListOnce(areq, afid, tsp);
 648                 areq->tokenError++;
 649
 650                 if (serversleft) {
 651                     afs_warnuser
 652                         ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d (server %d.%d.%d.%d)\n",
 653                          tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
 654                          (address >> 24), (address >> 16) & 0xff,
 655                          (address >> 8) & 0xff, (address) & 0xff);
 656                     shouldRetry = 1;
 657                 } else {
 658                     areq->tokenError = 0;
 659                     aconn->forceConnectFS = 0;  /* don't check until new tokens set */
 660                     aconn->parent->user->states |= UTokensBad;
 661                     afs_NotifyUser(tu, UTokensDropped);
 662                     afs_warnuser
 663                         ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d, server %d.%d.%d.%d)\n",
 664                          tu->viceId, aconn->parent->srvr->server->cell->cellName, acode,
 665                          (address >> 24), (address >> 16) & 0xff,
 666                          (address >> 8) & 0xff, (address) & 0xff);
 667                 }
 668             }
 669             afs_PutUser(tu, READ_LOCK);
 670         } else {
 671             /* The else case shouldn't be possible and should probably be replaced by a panic? */
 672             if (acode == VICETOKENDEAD) {
 673                 aconn->forceConnectFS = 1;
 674             } else if (acode == RXKADEXPIRED) {
 675                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 676                 aconn->parent->user->states |= UTokensBad;
 677                 afs_NotifyUser(tu, UTokensDropped);
 678                 afs_warnuser
 679                     ("afs: Tokens for user %d for cell %s have expired (server %d.%d.%d.%d)\n",
 680                      areq->uid, aconn->parent->srvr->server->cell->cellName,
 681                      (address >> 24), (address >> 16) & 0xff,
 682                      (address >> 8) & 0xff, (address) & 0xff);
 683             } else {
 684                 aconn->forceConnectFS = 0;      /* don't check until new tokens set */
 685                 aconn->parent->user->states |= UTokensBad;
 686                 afs_NotifyUser(tu, UTokensDropped);
 687                 afs_warnuser
 688                     ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d, server %d.%d.%d.%d)\n",
 689                      areq->uid, aconn->parent->srvr->server->cell->cellName,
 690                      acode,
 691                      (address >> 24), (address >> 16) & 0xff,
 692                      (address >> 8) & 0xff, (address) & 0xff);
 693
 694             }
 695         }
 696         shouldRetry = 1;        /* Try again (as root). */
 697     }
 698     /* Check for access violation. */
 699     else if (acode == EACCES) {
 700         /* should mark access error in non-existent per-user global structure */
 701         if (aerrP)
 702             (aerrP->err_Protection)++;
 703         areq->accessError = 1;
 704         if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
 705             areq->permWriteError = 1;
 706         shouldRetry = 0;
 707     }
 708     /* check for ubik errors; treat them like crashed servers */
 709     else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) {
 710         afs_ServerDown(sa, acode);
 711         if (aerrP)
 712             (aerrP->err_Server)++;
 713         shouldRetry = 1;        /* retryable (maybe one is working) */
 714         VSleep(1);              /* just in case */
 715     }
 716     /* Check for bad volume data base / missing volume. */
 717     else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL
 718              || acode == VNOSERVICE || acode == VMOVED) {
 719         struct cell *tcell;
 720         int same;
 721
 722         shouldRetry = 1;
 723         areq->volumeError = VOLMISSING;
 724         if (aerrP)
 725             (aerrP->err_Volume)++;
 726         if (afid && (tcell = afs_GetCell(afid->Cell, 0))) {
 727             same = VLDB_Same(afid, areq);
 728             tvp = afs_FindVolume(afid, READ_LOCK);
 729             if (tvp) {
 730                 for (i = 0; i < AFS_MAXHOSTS && tvp->serverHost[i]; i++) {
 731                     if (tvp->serverHost[i] == tsp) {
 732                         if (tvp->status[i] == end_not_busy)
 733                             tvp->status[i] = offline;
 734                         else
 735                             tvp->status[i]++;
 736                     } else if (!same) {
 737                         tvp->status[i] = not_busy;      /* reset the others */
 738                     }
 739                 }
 740                 afs_PutVolume(tvp, READ_LOCK);
 741             }
 742         }
 743     } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) {    /* vlserver errors */
 744         shouldRetry = 0;
 745         areq->volumeError = VOLMISSING;
 746     } else if (acode >= 0) {
 747         if (aerrP)
 748             (aerrP->err_Other)++;
 749         if (op == AFS_STATS_FS_RPCIDX_STOREDATA)
 750             areq->permWriteError = 1;
 751         shouldRetry = 0;        /* Other random Vice error. */
 752     } else if (acode == RX_MSGSIZE) {   /* same meaning as EMSGSIZE... */
 753         afs_warnuser
 754             ("afs: Path MTU may have been exceeded, retrying (server %d.%d.%d.%d)\n",
 755              (address >> 24), (address >> 16) & 0xff,
 756              (address >> 8) & 0xff, (address) & 0xff);
 757
 758         VSleep(1);              /* Just a hack for desperate times. */
 759         if (aerrP)
 760             (aerrP->err_Other)++;
 761         shouldRetry = 1;        /* packet was too big, please retry call */
 762     }
 763
 764     if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) {
 765         /* If we get here, code < 0 and we have network/Server troubles.
 766          * areq->networkError is not set here, since we always
 767          * retry in case there is another server.  However, if we find
 768          * no connection (aconn == 0) we set the networkError flag.
 769          */
 770         afs_ServerDown(sa, acode);
 771         if (aerrP)
 772             (aerrP->err_Server)++;
 773         VSleep(1);              /* Just a hack for desperate times. */
 774         shouldRetry = 1;
 775     }
 776 out:
 777     /* now unlock the connection and return */
 778     afs_PutConn(aconn, rxconn, locktype);
 779     return (shouldRetry);
 780 }                               /*afs_Analyze */