src/afs/VNOPS/afs_vnop_lookup.c

   1 /*
   2  * Copyright 2000, International Business Machines Corporation and others.
   3  * All Rights Reserved.
   4  *
   5  * This software has been released under the terms of the IBM Public
   6  * License.  For details, see the LICENSE file in the top-level source
   7  * directory or online at http://www.openafs.org/dl/license10.html
   8  */
   9
  10 /*
  11  * Implements:
  12  * afs_lookup
  13  * EvalMountPoint
  14  * afs_DoBulkStat
  15  */
  16
  17 #include <afsconfig.h>
  18 #include "afs/param.h"
  19
  20 RCSID
  21     ("$Header$");
  22
  23 #include "afs/sysincludes.h"    /* Standard vendor system headers */
  24 #include "afsincludes.h"        /* Afs-based standard headers */
  25 #include "afs/afs_stats.h"      /* statistics */
  26 #include "afs/afs_cbqueue.h"
  27 #include "afs/nfsclient.h"
  28 #include "afs/exporter.h"
  29 #include "afs/afs_osidnlc.h"
  30
  31
  32 extern struct DirEntry *afs_dir_GetBlob();
  33
  34 #ifdef AFS_LINUX22_ENV
  35 extern struct inode_operations afs_symlink_iops, afs_dir_iops;
  36 #endif
  37
  38
  39 afs_int32 afs_bkvolpref = 0;
  40 afs_int32 afs_bulkStatsDone;
  41 static int bulkStatCounter = 0; /* counter for bulk stat seq. numbers */
  42 int afs_fakestat_enable = 0;    /* 1: fakestat-all, 2: fakestat-crosscell */
  43
  44
  45 /* this would be faster if it did comparison as int32word, but would be
  46  * dependant on byte-order and alignment, and I haven't figured out
  47  * what "@sys" is in binary... */
  48 #define AFS_EQ_ATSYS(name) (((name)[0]=='@')&&((name)[1]=='s')&&((name)[2]=='y')&&((name)[3]=='s')&&(!(name)[4]))
  49
  50 /* call under write lock, evaluate mvid field from a mt pt.
  51  * avc is the vnode of the mount point object; must be write-locked.
  52  * advc is the vnode of the containing directory (optional; if NULL and
  53  *   EvalMountPoint succeeds, caller must initialize *avolpp->dotdot)
  54  * avolpp is where we return a pointer to the volume named by the mount pt, if success
  55  * areq is the identity of the caller.
  56  *
  57  * NOTE: this function returns a held volume structure in *volpp if it returns 0!
  58  */
  59 int
  60 EvalMountPoint(register struct vcache *avc, struct vcache *advc,
  61                struct volume **avolpp, register struct vrequest *areq)
  62 {
  63     afs_int32 code;
  64     struct volume *tvp = 0;
  65     struct VenusFid tfid;
  66     struct cell *tcell;
  67     char *cpos, *volnamep;
  68     char type, *buf;
  69     afs_int32 prefetch;         /* 1=>None  2=>RO  3=>BK */
  70     afs_int32 mtptCell, assocCell, hac = 0;
  71     afs_int32 samecell, roname, len;
  72
  73     AFS_STATCNT(EvalMountPoint);
  74 #ifdef notdef
  75     if (avc->mvid && (avc->states & CMValid))
  76         return 0;               /* done while racing */
  77 #endif
  78     *avolpp = NULL;
  79     code = afs_HandleLink(avc, areq);
  80     if (code)
  81         return code;
  82
  83     /* Determine which cell and volume the mointpoint goes to */
  84     type = avc->linkData[0];    /* '#'=>Regular '%'=>RW */
  85     cpos = afs_strchr(&avc->linkData[1], ':');  /* if cell name present */
  86     if (cpos) {
  87         volnamep = cpos + 1;
  88         *cpos = 0;
  89         tcell = afs_GetCellByName(&avc->linkData[1], READ_LOCK);
  90         *cpos = ':';
  91     } else {
  92         volnamep = &avc->linkData[1];
  93         tcell = afs_GetCell(avc->fid.Cell, READ_LOCK);
  94     }
  95     if (!tcell)
  96         return ENODEV;
  97
  98     mtptCell = tcell->cellNum;  /* The cell for the mountpoint */
  99     if (tcell->lcellp) {
 100         hac = 1;                /* has associated cell */
 101         assocCell = tcell->lcellp->cellNum;     /* The associated cell */
 102     }
 103     afs_PutCell(tcell, READ_LOCK);
 104
 105     /* Is volume name a "<n>.backup" or "<n>.readonly" name */
 106     len = strlen(volnamep);
 107     roname = ((len > 9) && (strcmp(&volnamep[len - 9], ".readonly") == 0))
 108         || ((len > 7) && (strcmp(&volnamep[len - 7], ".backup") == 0));
 109
 110     /* When we cross mountpoint, do we stay in the same cell */
 111     samecell = (avc->fid.Cell == mtptCell) || (hac
 112                                                && (avc->fid.Cell ==
 113                                                    assocCell));
 114
 115     /* Decide whether to prefetch the BK, or RO.  Also means we want the BK or
 116      * RO.
 117      * If this is a regular mountpoint with a RW volume name
 118      * - If BK preference is enabled AND we remain within the same cell AND
 119      *   start from a BK volume, then we will want to prefetch the BK volume.
 120      * - If we cross a cell boundary OR start from a RO volume, then we will
 121      *   want to prefetch the RO volume.
 122      */
 123     if ((type == '#') && !roname) {
 124         if (afs_bkvolpref && samecell && (avc->states & CBackup))
 125             prefetch = 3;       /* Prefetch the BK */
 126         else if (!samecell || (avc->states & CRO))
 127             prefetch = 2;       /* Prefetch the RO */
 128         else
 129             prefetch = 1;       /* Do not prefetch */
 130     } else {
 131         prefetch = 1;           /* Do not prefetch */
 132     }
 133
 134     /* Get the volume struct. Unless this volume name has ".readonly" or
 135      * ".backup" in it, this will get the volume struct for the RW volume.
 136      * The RO volume will be prefetched if requested (but not returned).
 137      */
 138     tvp = afs_GetVolumeByName(volnamep, mtptCell, prefetch, areq, WRITE_LOCK);
 139
 140     /* If no volume was found in this cell, try the associated linked cell */
 141     if (!tvp && hac && areq->volumeError) {
 142         tvp =
 143             afs_GetVolumeByName(volnamep, assocCell, prefetch, areq,
 144                                 WRITE_LOCK);
 145     }
 146
 147     /* Still not found. If we are looking for the RO, then perhaps the RW
 148      * doesn't exist? Try adding ".readonly" to volname and look for that.
 149      * Don't know why we do this. Would have still found it in above call - jpm.
 150      */
 151     if (!tvp && (prefetch == 2) && len < AFS_SMALLOCSIZ - 10) {
 152         buf = (char *)osi_AllocSmallSpace(len + 10);
 153
 154         strcpy(buf, volnamep);
 155         afs_strcat(buf, ".readonly");
 156
 157         tvp = afs_GetVolumeByName(buf, mtptCell, 1, areq, WRITE_LOCK);
 158
 159         /* Try the associated linked cell if failed */
 160         if (!tvp && hac && areq->volumeError) {
 161             tvp = afs_GetVolumeByName(buf, assocCell, 1, areq, WRITE_LOCK);
 162         }
 163         osi_FreeSmallSpace(buf);
 164     }
 165
 166     if (!tvp)
 167         return ENODEV;          /* Couldn't find the volume */
 168
 169     /* Don't cross mountpoint from a BK to a BK volume */
 170     if ((avc->states & CBackup) && (tvp->states & VBackup)) {
 171         afs_PutVolume(tvp, WRITE_LOCK);
 172         return ENODEV;
 173     }
 174
 175     /* If we want (prefetched) the BK and it exists, then drop the RW volume
 176      * and get the BK.
 177      * Otherwise, if we want (prefetched0 the RO and it exists, then drop the
 178      * RW volume and get the RO.
 179      * Otherwise, go with the RW.
 180      */
 181     if ((prefetch == 3) && tvp->backVol) {
 182         tfid.Fid.Volume = tvp->backVol; /* remember BK volume */
 183         tfid.Cell = tvp->cell;
 184         afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
 185         tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK);   /* get the new one */
 186         if (!tvp)
 187             return ENODEV;      /* oops, can't do it */
 188     } else if ((prefetch >= 2) && tvp->roVol) {
 189         tfid.Fid.Volume = tvp->roVol;   /* remember RO volume */
 190         tfid.Cell = tvp->cell;
 191         afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
 192         tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK);   /* get the new one */
 193         if (!tvp)
 194             return ENODEV;      /* oops, can't do it */
 195     }
 196
 197     if (avc->mvid == 0)
 198         avc->mvid =
 199             (struct VenusFid *)osi_AllocSmallSpace(sizeof(struct VenusFid));
 200     avc->mvid->Cell = tvp->cell;
 201     avc->mvid->Fid.Volume = tvp->volume;
 202     avc->mvid->Fid.Vnode = 1;
 203     avc->mvid->Fid.Unique = 1;
 204     avc->states |= CMValid;
 205
 206     /* Used to: if the mount point is stored within a backup volume,
 207      * then we should only update the parent pointer information if
 208      * there's none already set, so as to avoid updating a volume's ..
 209      * info with something in an OldFiles directory.
 210      *
 211      * Next two lines used to be under this if:
 212      *
 213      * if (!(avc->states & CBackup) || tvp->dotdot.Fid.Volume == 0)
 214      *
 215      * Now: update mount point back pointer on every call, so that we handle
 216      * multiple mount points better.  This way, when du tries to go back
 217      * via chddir(".."), it will end up exactly where it started, yet
 218      * cd'ing via a new path to a volume will reset the ".." pointer
 219      * to the new path.
 220      */
 221     tvp->mtpoint = avc->fid;    /* setup back pointer to mtpoint */
 222     if (advc)
 223         tvp->dotdot = advc->fid;
 224
 225     *avolpp = tvp;
 226     return 0;
 227 }
 228
 229 /*
 230  * afs_InitFakeStat
 231  *
 232  * Must be called on an afs_fakestat_state object before calling
 233  * afs_EvalFakeStat or afs_PutFakeStat.  Calling afs_PutFakeStat
 234  * without calling afs_EvalFakeStat is legal, as long as this
 235  * function is called.
 236  */
 237 void
 238 afs_InitFakeStat(struct afs_fakestat_state *state)
 239 {
 240     if (!afs_fakestat_enable)
 241         return;
 242
 243     state->valid = 1;
 244     state->did_eval = 0;
 245     state->need_release = 0;
 246 }
 247
 248 /*
 249  * afs_EvalFakeStat_int
 250  *
 251  * The actual implementation of afs_EvalFakeStat and afs_TryEvalFakeStat,
 252  * which is called by those wrapper functions.
 253  *
 254  * Only issues RPCs if canblock is non-zero.
 255  */
 256 int
 257 afs_EvalFakeStat_int(struct vcache **avcp, struct afs_fakestat_state *state,
 258                      struct vrequest *areq, int canblock)
 259 {
 260     struct vcache *tvc, *root_vp;
 261     struct volume *tvolp = NULL;
 262     int code = 0;
 263
 264     if (!afs_fakestat_enable)
 265         return 0;
 266
 267     osi_Assert(state->valid == 1);
 268     osi_Assert(state->did_eval == 0);
 269     state->did_eval = 1;
 270
 271     tvc = *avcp;
 272     if (tvc->mvstat != 1)
 273         return 0;
 274
 275     /* Is the call to VerifyVCache really necessary? */
 276     code = afs_VerifyVCache(tvc, areq);
 277     if (code)
 278         goto done;
 279     if (canblock) {
 280         ObtainWriteLock(&tvc->lock, 599);
 281         code = EvalMountPoint(tvc, NULL, &tvolp, areq);
 282         ReleaseWriteLock(&tvc->lock);
 283         if (code)
 284             goto done;
 285         if (tvolp) {
 286             tvolp->dotdot = tvc->fid;
 287             tvolp->dotdot.Fid.Vnode = tvc->parentVnode;
 288             tvolp->dotdot.Fid.Unique = tvc->parentUnique;
 289         }
 290     }
 291     if (tvc->mvid && (tvc->states & CMValid)) {
 292         if (!canblock) {
 293             afs_int32 retry;
 294
 295             do {
 296                 retry = 0;
 297                 ObtainWriteLock(&afs_xvcache, 597);
 298                 root_vp = afs_FindVCache(tvc->mvid, &retry, 0);
 299                 if (root_vp && retry) {
 300                     ReleaseWriteLock(&afs_xvcache);
 301                     afs_PutVCache(root_vp);
 302                 }
 303             } while (root_vp && retry);
 304             ReleaseWriteLock(&afs_xvcache);
 305         } else {
 306             root_vp = afs_GetVCache(tvc->mvid, areq, NULL, NULL);
 307         }
 308         if (!root_vp) {
 309             code = canblock ? ENOENT : 0;
 310             goto done;
 311         }
 312         if (tvolp) {
 313             /* Is this always kosher?  Perhaps we should instead use
 314              * NBObtainWriteLock to avoid potential deadlock.
 315              */
 316             ObtainWriteLock(&root_vp->lock, 598);
 317             if (!root_vp->mvid)
 318                 root_vp->mvid = osi_AllocSmallSpace(sizeof(struct VenusFid));
 319             *root_vp->mvid = tvolp->dotdot;
 320             ReleaseWriteLock(&root_vp->lock);
 321         }
 322         state->need_release = 1;
 323         state->root_vp = root_vp;
 324         *avcp = root_vp;
 325         code = 0;
 326     } else {
 327         code = canblock ? ENOENT : 0;
 328     }
 329
 330   done:
 331     if (tvolp)
 332         afs_PutVolume(tvolp, WRITE_LOCK);
 333     return code;
 334 }
 335
 336 /*
 337  * afs_EvalFakeStat
 338  *
 339  * Automatically does the equivalent of EvalMountPoint for vcache entries
 340  * which are mount points.  Remembers enough state to properly release
 341  * the volume root vcache when afs_PutFakeStat() is called.
 342  *
 343  * State variable must be initialized by afs_InitFakeState() beforehand.
 344  *
 345  * Returns 0 when everything succeeds and *avcp points to the vcache entry
 346  * that should be used for the real vnode operation.  Returns non-zero if
 347  * something goes wrong and the error code should be returned to the user.
 348  */
 349 int
 350 afs_EvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
 351                  struct vrequest *areq)
 352 {
 353     return afs_EvalFakeStat_int(avcp, state, areq, 1);
 354 }
 355
 356 /*
 357  * afs_TryEvalFakeStat
 358  *
 359  * Same as afs_EvalFakeStat, but tries not to talk to remote servers
 360  * and only evaluate the mount point if all the data is already in
 361  * local caches.
 362  *
 363  * Returns 0 if everything succeeds and *avcp points to a valid
 364  * vcache entry (possibly evaluated).
 365  */
 366 int
 367 afs_TryEvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
 368                     struct vrequest *areq)
 369 {
 370     return afs_EvalFakeStat_int(avcp, state, areq, 0);
 371 }
 372
 373 /*
 374  * afs_PutFakeStat
 375  *
 376  * Perform any necessary cleanup at the end of a vnode op, given that
 377  * afs_InitFakeStat was previously called with this state.
 378  */
 379 void
 380 afs_PutFakeStat(struct afs_fakestat_state *state)
 381 {
 382     if (!afs_fakestat_enable)
 383         return;
 384
 385     osi_Assert(state->valid == 1);
 386     if (state->need_release)
 387         afs_PutVCache(state->root_vp);
 388     state->valid = 0;
 389 }
 390
 391 int
 392 afs_ENameOK(register char *aname)
 393 {
 394     register int tlen;
 395
 396     AFS_STATCNT(ENameOK);
 397     tlen = strlen(aname);
 398     if (tlen >= 4 && strcmp(aname + tlen - 4, "@sys") == 0)
 399         return 0;
 400     return 1;
 401 }
 402
 403 static int
 404 afs_getsysname(register struct vrequest *areq, register struct vcache *adp,
 405                register char *bufp, int *num, char **sysnamelist[])
 406 {
 407     register struct unixuser *au;
 408     register afs_int32 error;
 409
 410     AFS_STATCNT(getsysname);
 411
 412     *sysnamelist = afs_sysnamelist;
 413
 414     if (!afs_nfsexporter)
 415         strcpy(bufp, (*sysnamelist)[0]);
 416     else {
 417         au = afs_GetUser(areq->uid, adp->fid.Cell, 0);
 418         if (au->exporter) {
 419             error = EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, num);
 420             if (error) {
 421                 strcpy(bufp, "@sys");
 422                 afs_PutUser(au, 0);
 423                 return -1;
 424             } else {
 425                 strcpy(bufp, (*sysnamelist)[0]);
 426             }
 427         } else
 428             strcpy(bufp, afs_sysname);
 429         afs_PutUser(au, 0);
 430     }
 431     return 0;
 432 }
 433
 434 void
 435 Check_AtSys(register struct vcache *avc, char *aname,
 436             struct sysname_info *state, struct vrequest *areq)
 437 {
 438     int num = 0;
 439     char **sysnamelist[MAXSYSNAME];
 440
 441     if (AFS_EQ_ATSYS(aname)) {
 442         state->offset = 0;
 443         state->name = (char *)osi_AllocLargeSpace(AFS_SMALLOCSIZ);
 444         state->allocked = 1;
 445         state->index =
 446             afs_getsysname(areq, avc, state->name, &num, sysnamelist);
 447     } else {
 448         state->offset = -1;
 449         state->allocked = 0;
 450         state->index = 0;
 451         state->name = aname;
 452     }
 453 }
 454
 455 int
 456 Next_AtSys(register struct vcache *avc, struct vrequest *areq,
 457            struct sysname_info *state)
 458 {
 459     int num = afs_sysnamecount;
 460     char **sysnamelist[MAXSYSNAME];
 461
 462     if (state->index == -1)
 463         return 0;               /* No list */
 464
 465     /* Check for the initial state of aname != "@sys" in Check_AtSys */
 466     if (state->offset == -1 && state->allocked == 0) {
 467         register char *tname;
 468
 469         /* Check for .*@sys */
 470         for (tname = state->name; *tname; tname++)
 471             /*Move to the end of the string */ ;
 472
 473         if ((tname > state->name + 4) && (AFS_EQ_ATSYS(tname - 4))) {
 474             state->offset = (tname - 4) - state->name;
 475             tname = (char *)osi_AllocLargeSpace(AFS_LRALLOCSIZ);
 476             strncpy(tname, state->name, state->offset);
 477             state->name = tname;
 478             state->allocked = 1;
 479             num = 0;
 480             state->index =
 481                 afs_getsysname(areq, avc, state->name + state->offset, &num,
 482                                sysnamelist);
 483             return 1;
 484         } else
 485             return 0;           /* .*@sys doesn't match either */
 486     } else {
 487         register struct unixuser *au;
 488         register afs_int32 error;
 489
 490         *sysnamelist = afs_sysnamelist;
 491
 492         if (afs_nfsexporter) {
 493             au = afs_GetUser(areq->uid, avc->fid.Cell, 0);
 494             if (au->exporter) {
 495                 error =
 496                     EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, num);
 497                 if (error) {
 498                     return 0;
 499                 }
 500             }
 501             afs_PutUser(au, 0);
 502         }
 503         if (++(state->index) >= num || !(*sysnamelist)[state->index])
 504             return 0;           /* end of list */
 505     }
 506     strcpy(state->name + state->offset, (*sysnamelist)[state->index]);
 507     return 1;
 508 }
 509
 510 #if (defined(AFS_SGI62_ENV) || defined(AFS_SUN57_64BIT_ENV))
 511 extern int BlobScan(ino64_t * afile, afs_int32 ablob);
 512 #else
 513 #if defined(AFS_HPUX1123_ENV)
 514 /* DEE should use the new afs_inode_t  for all */
 515 extern int BlobScan(ino_t * afile, afs_int32 ablob);
 516 #else
 517 #if defined AFS_LINUX_64BIT_KERNEL
 518 extern int BlobScan(long *afile, afs_int32 ablob);
 519 #else
 520 extern int BlobScan(afs_int32 * afile, afs_int32 ablob);
 521 #endif
 522 #endif
 523 #endif
 524
 525
 526 /* called with an unlocked directory and directory cookie.  Areqp
 527  * describes who is making the call.
 528  * Scans the next N (about 30, typically) directory entries, and does
 529  * a bulk stat call to stat them all.
 530  *
 531  * Must be very careful when merging in RPC responses, since we dont
 532  * want to overwrite newer info that was added by a file system mutating
 533  * call that ran concurrently with our bulk stat call.
 534  *
 535  * We do that, as described below, by not merging in our info (always
 536  * safe to skip the merge) if the status info is valid in the vcache entry.
 537  *
 538  * If adapt ever implements the bulk stat RPC, then this code will need to
 539  * ensure that vcaches created for failed RPC's to older servers have the
 540  * CForeign bit set.
 541  */
 542 static struct vcache *BStvc = NULL;
 543
 544 int
 545 afs_DoBulkStat(struct vcache *adp, long dirCookie, struct vrequest *areqp)
 546 {
 547     int nentries;               /* # of entries to prefetch */
 548     int nskip;                  /* # of slots in the LRU queue to skip */
 549     struct vcache *lruvcp;      /* vcache ptr of our goal pos in LRU queue */
 550     struct dcache *dcp;         /* chunk containing the dir block */
 551     char *statMemp;             /* status memory block */
 552     char *cbfMemp;              /* callback and fid memory block */
 553     afs_size_t temp;            /* temp for holding chunk length, &c. */
 554     struct AFSFid *fidsp;       /* file IDs were collecting */
 555     struct AFSCallBack *cbsp;   /* call back pointers */
 556     struct AFSCallBack *tcbp;   /* temp callback ptr */
 557     struct AFSFetchStatus *statsp;      /* file status info */
 558     struct AFSVolSync volSync;  /* vol sync return info */
 559     struct vcache *tvcp;        /* temp vcp */
 560     struct afs_q *tq;           /* temp queue variable */
 561     AFSCBFids fidParm;          /* file ID parm for bulk stat */
 562     AFSBulkStats statParm;      /* stat info parm for bulk stat */
 563     int fidIndex;               /* which file were stating */
 564     struct conn *tcp;           /* conn for call */
 565     AFSCBs cbParm;              /* callback parm for bulk stat */
 566     struct server *hostp = 0;   /* host we got callback from */
 567     long startTime;             /* time we started the call,
 568                                  * for callback expiration base
 569                                  */
 570     afs_size_t statSeqNo;       /* Valued of file size to detect races */
 571     int code;                   /* error code */
 572     long newIndex;              /* new index in the dir */
 573     struct DirEntry *dirEntryp; /* dir entry we are examining */
 574     int i;
 575     struct VenusFid afid;       /* file ID we are using now */
 576     struct VenusFid tfid;       /* another temp. file ID */
 577     afs_int32 retry;            /* handle low-level SGI MP race conditions */
 578     long volStates;             /* flags from vol structure */
 579     struct volume *volp = 0;    /* volume ptr */
 580     struct VenusFid dotdot;
 581     int flagIndex;              /* First file with bulk fetch flag set */
 582     int inlinebulk = 0;         /* Did we use InlineBulk RPC or not? */
 583     XSTATS_DECLS;
 584     /* first compute some basic parameters.  We dont want to prefetch more
 585      * than a fraction of the cache in any given call, and we want to preserve
 586      * a portion of the LRU queue in any event, so as to avoid thrashing
 587      * the entire stat cache (we will at least leave some of it alone).
 588      * presently dont stat more than 1/8 the cache in any one call.      */
 589     nentries = afs_cacheStats / 8;
 590
 591     /* dont bother prefetching more than one calls worth of info */
 592     if (nentries > AFSCBMAX)
 593         nentries = AFSCBMAX;
 594
 595     /* heuristic to make sure that things fit in 4K.  This means that
 596      * we shouldnt make it any bigger than 47 entries.  I am typically
 597      * going to keep it a little lower, since we don't want to load
 598      * too much of the stat cache.
 599      */
 600     if (nentries > 30)
 601         nentries = 30;
 602
 603     /* now, to reduce the stack size, well allocate two 4K blocks,
 604      * one for fids and callbacks, and one for stat info.  Well set
 605      * up our pointers to the memory from there, too.
 606      */
 607     statMemp = osi_AllocLargeSpace(nentries * sizeof(AFSFetchStatus));
 608     statsp = (struct AFSFetchStatus *)statMemp;
 609     cbfMemp =
 610         osi_AllocLargeSpace(nentries *
 611                             (sizeof(AFSCallBack) + sizeof(AFSFid)));
 612     fidsp = (AFSFid *) cbfMemp;
 613     cbsp = (AFSCallBack *) (cbfMemp + nentries * sizeof(AFSFid));
 614
 615     /* next, we must iterate over the directory, starting from the specified
 616      * cookie offset (dirCookie), and counting out nentries file entries.
 617      * We skip files that already have stat cache entries, since we
 618      * dont want to bulk stat files that are already in the cache.
 619      */
 620   tagain:
 621     code = afs_VerifyVCache(adp, areqp);
 622     if (code)
 623         goto done;
 624
 625     dcp = afs_GetDCache(adp, (afs_size_t) 0, areqp, &temp, &temp, 1);
 626     if (!dcp) {
 627         code = ENOENT;
 628         goto done;
 629     }
 630
 631     /* lock the directory cache entry */
 632     ObtainReadLock(&adp->lock);
 633     ObtainReadLock(&dcp->lock);
 634
 635     /*
 636      * Make sure that the data in the cache is current. There are two
 637      * cases we need to worry about:
 638      * 1. The cache data is being fetched by another process.
 639      * 2. The cache data is no longer valid
 640      */
 641     while ((adp->states & CStatd)
 642            && (dcp->dflags & DFFetching)
 643            && hsame(adp->m.DataVersion, dcp->f.versionNo)) {
 644         afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
 645                    __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, dcp,
 646                    ICL_TYPE_INT32, dcp->dflags);
 647         ReleaseReadLock(&dcp->lock);
 648         ReleaseReadLock(&adp->lock);
 649         afs_osi_Sleep(&dcp->validPos);
 650         ObtainReadLock(&adp->lock);
 651         ObtainReadLock(&dcp->lock);
 652     }
 653     if (!(adp->states & CStatd)
 654         || !hsame(adp->m.DataVersion, dcp->f.versionNo)) {
 655         ReleaseReadLock(&dcp->lock);
 656         ReleaseReadLock(&adp->lock);
 657         afs_PutDCache(dcp);
 658         goto tagain;
 659     }
 660
 661     /* Generate a sequence number so we can tell whether we should
 662      * store the attributes when processing the response. This number is
 663      * stored in the file size when we set the CBulkFetching bit. If the
 664      * CBulkFetching is still set and this value hasn't changed, then
 665      * we know we were the last to set CBulkFetching bit for this file,
 666      * and it is safe to set the status information for this file.
 667      */
 668     statSeqNo = bulkStatCounter++;
 669
 670     /* now we have dir data in the cache, so scan the dir page */
 671     fidIndex = 0;
 672     flagIndex = 0;
 673     while (1) {                 /* Should probably have some constant bound */
 674         /* look for first safe entry to examine in the directory.  BlobScan
 675          * looks for a the 1st allocated dir after the dirCookie slot.
 676          */
 677         newIndex = BlobScan(&dcp->f.inode, (dirCookie >> 5));
 678         if (newIndex == 0)
 679             break;
 680
 681         /* remember the updated directory cookie */
 682         dirCookie = newIndex << 5;
 683
 684         /* get a ptr to the dir entry */
 685         dirEntryp =
 686             (struct DirEntry *)afs_dir_GetBlob(&dcp->f.inode, newIndex);
 687         if (!dirEntryp)
 688             break;
 689
 690         /* dont copy more than we have room for */
 691         if (fidIndex >= nentries) {
 692             DRelease((struct buffer *)dirEntryp, 0);
 693             break;
 694         }
 695
 696         /* now, if the dir entry looks good, copy it out to our list.  Vnode
 697          * 0 means deleted, although it should also be free were it deleted.
 698          */
 699         if (dirEntryp->fid.vnode != 0) {
 700             /* dont copy entries we have in our cache.  This check will
 701              * also make us skip "." and probably "..", unless it has
 702              * disappeared from the cache since we did our namei call.
 703              */
 704             tfid.Cell = adp->fid.Cell;
 705             tfid.Fid.Volume = adp->fid.Fid.Volume;
 706             tfid.Fid.Vnode = ntohl(dirEntryp->fid.vnode);
 707             tfid.Fid.Unique = ntohl(dirEntryp->fid.vunique);
 708             do {
 709                 retry = 0;
 710                 ObtainWriteLock(&afs_xvcache, 130);
 711                 tvcp = afs_FindVCache(&tfid, &retry, 0 /* no stats | LRU */ );
 712                 if (tvcp && retry) {
 713                     ReleaseWriteLock(&afs_xvcache);
 714                     afs_PutVCache(tvcp);
 715                 }
 716             } while (tvcp && retry);
 717             if (!tvcp) {        /* otherwise, create manually */
 718                 tvcp = afs_NewVCache(&tfid, hostp);
 719                 ObtainWriteLock(&tvcp->lock, 505);
 720                 ReleaseWriteLock(&afs_xvcache);
 721                 afs_RemoveVCB(&tfid);
 722                 ReleaseWriteLock(&tvcp->lock);
 723             } else {
 724                 ReleaseWriteLock(&afs_xvcache);
 725             }
 726             if (!tvcp)
 727                 goto done;      /* can't happen at present, more's the pity */
 728
 729             /* WARNING: afs_DoBulkStat uses the Length field to store a
 730              * sequence number for each bulk status request. Under no
 731              * circumstances should afs_DoBulkStat store a sequence number
 732              * if the new length will be ignored when afs_ProcessFS is
 733              * called with new stats. */
 734 #ifdef AFS_SGI_ENV
 735             if (!(tvcp->states & (CStatd | CBulkFetching))
 736                 && (tvcp->execsOrWriters <= 0)
 737                 && !afs_DirtyPages(tvcp)
 738                 && !AFS_VN_MAPPED((vnode_t *) tvcp))
 739 #else
 740             if (!(tvcp->states & (CStatd | CBulkFetching))
 741                 && (tvcp->execsOrWriters <= 0)
 742                 && !afs_DirtyPages(tvcp))
 743 #endif
 744
 745             {
 746                 /* this entry doesnt exist in the cache, and is not
 747                  * already being fetched by someone else, so add it to the
 748                  * list of file IDs to obtain.
 749                  *
 750                  * We detect a callback breaking race condition by checking the
 751                  * CBulkFetching state bit and the value in the file size.
 752                  * It is safe to set the status only if the CBulkFetching
 753                  * flag is still set and the value in the file size does
 754                  * not change.
 755                  *
 756                  * Don't fetch status for dirty files. We need to
 757                  * preserve the value of the file size. We could
 758                  * flush the pages, but it wouldn't be worthwhile.
 759                  */
 760                 memcpy((char *)(fidsp + fidIndex), (char *)&tfid.Fid,
 761                        sizeof(*fidsp));
 762                 tvcp->states |= CBulkFetching;
 763                 tvcp->m.Length = statSeqNo;
 764                 fidIndex++;
 765             }
 766             afs_PutVCache(tvcp);
 767         }
 768
 769         /* if dir vnode has non-zero entry */
 770         /* move to the next dir entry by adding in the # of entries
 771          * used by this dir entry.
 772          */
 773         temp = afs_dir_NameBlobs(dirEntryp->name) << 5;
 774         DRelease((struct buffer *)dirEntryp, 0);
 775         if (temp <= 0)
 776             break;
 777         dirCookie += temp;
 778     }                           /* while loop over all dir entries */
 779
 780     /* now release the dir lock and prepare to make the bulk RPC */
 781     ReleaseReadLock(&dcp->lock);
 782     ReleaseReadLock(&adp->lock);
 783
 784     /* release the chunk */
 785     afs_PutDCache(dcp);
 786
 787     /* dont make a null call */
 788     if (fidIndex == 0)
 789         goto done;
 790
 791     do {
 792         /* setup the RPC parm structures */
 793         fidParm.AFSCBFids_len = fidIndex;
 794         fidParm.AFSCBFids_val = fidsp;
 795         statParm.AFSBulkStats_len = fidIndex;
 796         statParm.AFSBulkStats_val = statsp;
 797         cbParm.AFSCBs_len = fidIndex;
 798         cbParm.AFSCBs_val = cbsp;
 799
 800         /* start the timer; callback expirations are relative to this */
 801         startTime = osi_Time();
 802
 803         tcp = afs_Conn(&adp->fid, areqp, SHARED_LOCK);
 804         if (tcp) {
 805             hostp = tcp->srvr->server;
 806             XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_BULKSTATUS);
 807             RX_AFS_GUNLOCK();
 808
 809             if (!(tcp->srvr->server->flags & SNO_INLINEBULK)) {
 810                 code =
 811                     RXAFS_InlineBulkStatus(tcp->id, &fidParm, &statParm,
 812                                            &cbParm, &volSync);
 813                 if (code == RXGEN_OPCODE) {
 814                     tcp->srvr->server->flags |= SNO_INLINEBULK;
 815                     inlinebulk = 0;
 816                     code =
 817                         RXAFS_BulkStatus(tcp->id, &fidParm, &statParm,
 818                                          &cbParm, &volSync);
 819                 } else
 820                     inlinebulk = 1;
 821             } else {
 822                 inlinebulk = 0;
 823                 code =
 824                     RXAFS_BulkStatus(tcp->id, &fidParm, &statParm, &cbParm,
 825                                      &volSync);
 826             }
 827             RX_AFS_GLOCK();
 828             XSTATS_END_TIME;
 829         } else
 830             code = -1;
 831     } while (afs_Analyze
 832              (tcp, code, &adp->fid, areqp, AFS_STATS_FS_RPCIDX_BULKSTATUS,
 833               SHARED_LOCK, NULL));
 834
 835     /* now, if we didnt get the info, bail out. */
 836     if (code)
 837         goto done;
 838
 839     /* we need vol flags to create the entries properly */
 840     dotdot.Fid.Volume = 0;
 841     volp = afs_GetVolume(&adp->fid, areqp, READ_LOCK);
 842     if (volp) {
 843         volStates = volp->states;
 844         if (volp->dotdot.Fid.Volume != 0)
 845             dotdot = volp->dotdot;
 846     } else
 847         volStates = 0;
 848
 849     /* find the place to merge the info into  We do this by skipping
 850      * nskip entries in the LRU queue.  The more we skip, the more
 851      * we preserve, since the head of the VLRU queue is the most recently
 852      * referenced file.
 853      */
 854   reskip:
 855     nskip = afs_cacheStats / 2; /* preserved fraction of the cache */
 856     ObtainReadLock(&afs_xvcache);
 857     if (QEmpty(&VLRU)) {
 858         /* actually a serious error, probably should panic. Probably will
 859          * panic soon, oh well. */
 860         ReleaseReadLock(&afs_xvcache);
 861         afs_warnuser("afs_DoBulkStat: VLRU empty!");
 862         goto done;
 863     }
 864     if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
 865         refpanic("Bulkstat VLRU inconsistent");
 866     }
 867     for (tq = VLRU.next; tq != &VLRU; tq = QNext(tq)) {
 868         if (--nskip <= 0)
 869             break;
 870         else if (QNext(QPrev(tq)) != tq) {
 871             BStvc = QTOV(tq);
 872             refpanic("BulkStat VLRU inconsistent");
 873         }
 874     }
 875     if (tq != &VLRU)
 876         lruvcp = QTOV(tq);
 877     else
 878         lruvcp = QTOV(VLRU.next);
 879
 880     /* now we have to hold this entry, so that it does not get moved
 881      * into the free list while we're running.  It could still get
 882      * moved within the lru queue, but hopefully that will be rare; it
 883      * doesn't hurt nearly as much.
 884      */
 885     retry = 0;
 886     osi_vnhold(lruvcp, &retry);
 887     ReleaseReadLock(&afs_xvcache);      /* could be read lock */
 888     if (retry)
 889         goto reskip;
 890
 891     /* otherwise, merge in the info.  We have to be quite careful here,
 892      * since we need to ensure that we don't merge old info over newer
 893      * stuff in a stat cache entry.  We're very conservative here: we don't
 894      * do the merge at all unless we ourselves create the stat cache
 895      * entry.  That's pretty safe, and should work pretty well, since we
 896      * typically expect to do the stat cache creation ourselves.
 897      *
 898      * We also have to take into account racing token revocations.
 899      */
 900     for (i = 0; i < fidIndex; i++) {
 901         if ((&statsp[i])->errorCode)
 902             continue;
 903         afid.Cell = adp->fid.Cell;
 904         afid.Fid.Volume = adp->fid.Fid.Volume;
 905         afid.Fid.Vnode = fidsp[i].Vnode;
 906         afid.Fid.Unique = fidsp[i].Unique;
 907         do {
 908             retry = 0;
 909             ObtainReadLock(&afs_xvcache);
 910             tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */ );
 911             ReleaseReadLock(&afs_xvcache);
 912         } while (tvcp && retry);
 913
 914         /* The entry may no longer exist */
 915         if (tvcp == NULL) {
 916             continue;
 917         }
 918
 919         /* now we have the entry held, but we need to fill it in */
 920         ObtainWriteLock(&tvcp->lock, 131);
 921
 922         /* if CBulkFetching is not set, or if the file size no longer
 923          * matches the value we placed there when we set the CBulkFetching
 924          * flag, then someone else has done something with this node,
 925          * and we may not have the latest status information for this
 926          * file.  Leave the entry alone.
 927          */
 928         if (!(tvcp->states & CBulkFetching) || (tvcp->m.Length != statSeqNo)) {
 929             flagIndex++;
 930             ReleaseWriteLock(&tvcp->lock);
 931             afs_PutVCache(tvcp);
 932             continue;
 933         }
 934
 935         /* now copy ".." entry back out of volume structure, if necessary */
 936         if (tvcp->mvstat == 2 && (dotdot.Fid.Volume != 0)) {
 937             if (!tvcp->mvid)
 938                 tvcp->mvid = (struct VenusFid *)
 939                     osi_AllocSmallSpace(sizeof(struct VenusFid));
 940             *tvcp->mvid = dotdot;
 941         }
 942
 943         ObtainWriteLock(&afs_xvcache, 132);
 944         if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
 945             refpanic("Bulkstat VLRU inconsistent2");
 946         }
 947         if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
 948             || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
 949             refpanic("Bulkstat VLRU inconsistent4");
 950         }
 951         if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
 952             || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
 953             refpanic("Bulkstat VLRU inconsistent5");
 954         }
 955
 956         if (tvcp != lruvcp) {   /* if they are == don't move it, don't corrupt vlru */
 957             QRemove(&tvcp->vlruq);
 958             QAdd(&lruvcp->vlruq, &tvcp->vlruq);
 959         }
 960
 961         if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
 962             refpanic("Bulkstat VLRU inconsistent3");
 963         }
 964         if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
 965             || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
 966             refpanic("Bulkstat VLRU inconsistent5");
 967         }
 968         if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
 969             || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
 970             refpanic("Bulkstat VLRU inconsistent6");
 971         }
 972         ReleaseWriteLock(&afs_xvcache);
 973
 974         ObtainWriteLock(&afs_xcbhash, 494);
 975
 976         /* We need to check the flags again. We may have missed
 977          * something while we were waiting for a lock.
 978          */
 979         if (!(tvcp->states & CBulkFetching) || (tvcp->m.Length != statSeqNo)) {
 980             flagIndex++;
 981             ReleaseWriteLock(&tvcp->lock);
 982             ReleaseWriteLock(&afs_xcbhash);
 983             afs_PutVCache(tvcp);
 984             continue;
 985         }
 986
 987         /* now merge in the resulting status back into the vnode.
 988          * We only do this if the entry looks clear.
 989          */
 990         afs_ProcessFS(tvcp, &statsp[i], areqp);
 991 #ifdef AFS_LINUX22_ENV
 992         /* overwrite the ops if it's a directory or symlink. */
 993         if (vType(tvcp) == VDIR)
 994             tvcp->v.v_op = &afs_dir_iops;
 995         else if (vType(tvcp) == VLNK)
 996             tvcp->v.v_op = &afs_symlink_iops;
 997 #endif
 998
 999         /* do some accounting for bulk stats: mark this entry as
1000          * loaded, so we can tell if we use it before it gets
1001          * recycled.
1002          */
1003         tvcp->states |= CBulkStat;
1004         tvcp->states &= ~CBulkFetching;
1005         flagIndex++;
1006         afs_bulkStatsDone++;
1007
1008         /* merge in vol info */
1009         if (volStates & VRO)
1010             tvcp->states |= CRO;
1011         if (volStates & VBackup)
1012             tvcp->states |= CBackup;
1013         if (volStates & VForeign)
1014             tvcp->states |= CForeign;
1015
1016         /* merge in the callback info */
1017         tvcp->states |= CTruth;
1018
1019         /* get ptr to the callback we are interested in */
1020         tcbp = cbsp + i;
1021
1022         if (tcbp->ExpirationTime != 0) {
1023             tvcp->cbExpires = tcbp->ExpirationTime + startTime;
1024             tvcp->callback = hostp;
1025             tvcp->states |= CStatd;
1026             afs_QueueCallback(tvcp, CBHash(tcbp->ExpirationTime), volp);
1027         } else if (tvcp->states & CRO) {
1028             /* ordinary callback on a read-only volume -- AFS 3.2 style */
1029             tvcp->cbExpires = 3600 + startTime;
1030             tvcp->callback = hostp;
1031             tvcp->states |= CStatd;
1032             afs_QueueCallback(tvcp, CBHash(3600), volp);
1033         } else {
1034             tvcp->callback = 0;
1035             tvcp->states &= ~(CStatd | CUnique);
1036             afs_DequeueCallback(tvcp);
1037             if ((tvcp->states & CForeign) || (vType(tvcp) == VDIR))
1038                 osi_dnlc_purgedp(tvcp); /* if it (could be) a directory */
1039         }
1040         ReleaseWriteLock(&afs_xcbhash);
1041
1042         ReleaseWriteLock(&tvcp->lock);
1043         /* finally, we're done with the entry */
1044         afs_PutVCache(tvcp);
1045     }                           /* for all files we got back */
1046
1047     /* finally return the pointer into the LRU queue */
1048     afs_PutVCache(lruvcp);
1049
1050   done:
1051     /* Be sure to turn off the CBulkFetching flags */
1052     for (i = flagIndex; i < fidIndex; i++) {
1053         afid.Cell = adp->fid.Cell;
1054         afid.Fid.Volume = adp->fid.Fid.Volume;
1055         afid.Fid.Vnode = fidsp[i].Vnode;
1056         afid.Fid.Unique = fidsp[i].Unique;
1057         do {
1058             retry = 0;
1059             ObtainReadLock(&afs_xvcache);
1060             tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */ );
1061             ReleaseReadLock(&afs_xvcache);
1062         } while (tvcp && retry);
1063         if (tvcp != NULL && (tvcp->states & CBulkFetching)
1064             && (tvcp->m.Length == statSeqNo)) {
1065             tvcp->states &= ~CBulkFetching;
1066         }
1067         if (tvcp != NULL) {
1068             afs_PutVCache(tvcp);
1069         }
1070     }
1071     if (volp)
1072         afs_PutVolume(volp, READ_LOCK);
1073
1074     /* If we did the InlineBulk RPC pull out the return code */
1075     if (inlinebulk) {
1076         if ((&statsp[0])->errorCode) {
1077             afs_Analyze(tcp, (&statsp[0])->errorCode, &adp->fid, areqp,
1078                         AFS_STATS_FS_RPCIDX_BULKSTATUS, SHARED_LOCK, NULL);
1079             code = (&statsp[0])->errorCode;
1080         }
1081     } else {
1082         code = 0;
1083     }
1084     osi_FreeLargeSpace(statMemp);
1085     osi_FreeLargeSpace(cbfMemp);
1086     return code;
1087 }
1088
1089 /* was: (AFS_DEC_ENV) || defined(AFS_OSF30_ENV) || defined(AFS_NCR_ENV) */
1090 static int AFSDOBULK = 1;
1091
1092 int
1093 #ifdef  AFS_OSF_ENV
1094 afs_lookup(adp, ndp)
1095      struct vcache *adp;
1096      struct nameidata *ndp;
1097 {
1098     char aname[MAXNAMLEN + 1];  /* XXX */
1099     struct vcache **avcp = (struct vcache **)&(ndp->ni_vp);
1100     struct ucred *acred = ndp->ni_cred;
1101     int wantparent = ndp->ni_nameiop & WANTPARENT;
1102     int opflag = ndp->ni_nameiop & OPFLAG;
1103 #else                           /* AFS_OSF_ENV */
1104 #if     defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
1105 afs_lookup(OSI_VC_ARG(adp), aname, avcp, pnp, flags, rdir, acred)
1106      struct pathname *pnp;
1107      int flags;
1108      struct vnode *rdir;
1109 #else
1110 #if defined(UKERNEL)
1111 afs_lookup(adp, aname, avcp, acred, flags)
1112      int flags;
1113 #else
1114 afs_lookup(adp, aname, avcp, acred)
1115 #endif                          /* UKERNEL */
1116 #endif                          /* SUN5 || SGI */
1117      OSI_VC_DECL(adp);
1118      struct vcache **avcp;
1119      char *aname;
1120      struct AFS_UCRED *acred;
1121 {
1122 #endif
1123     struct vrequest treq;
1124     char *tname = NULL;
1125     register struct vcache *tvc = 0;
1126     register afs_int32 code;
1127     register afs_int32 bulkcode = 0;
1128     int pass = 0, hit = 0;
1129     long dirCookie;
1130     extern afs_int32 afs_mariner;       /*Writing activity to log? */
1131     afs_hyper_t versionNo;
1132     int no_read_access = 0;
1133     struct sysname_info sysState;       /* used only for @sys checking */
1134     int dynrootRetry = 1;
1135     struct afs_fakestat_state fakestate;
1136     int tryEvalOnly = 0;
1137     OSI_VC_CONVERT(adp);
1138
1139     AFS_STATCNT(afs_lookup);
1140     afs_InitFakeStat(&fakestate);
1141
1142     if ((code = afs_InitReq(&treq, acred)))
1143         goto done;
1144
1145 #ifdef  AFS_OSF_ENV
1146     ndp->ni_dvp = AFSTOV(adp);
1147     memcpy(aname, ndp->ni_ptr, ndp->ni_namelen);
1148     aname[ndp->ni_namelen] = '\0';
1149 #endif /* AFS_OSF_ENV */
1150
1151 #if defined(AFS_DARWIN_ENV)
1152     /* Workaround for MacOSX Finder, which tries to look for
1153      * .DS_Store and Contents under every directory.
1154      */
1155     if (afs_fakestat_enable && adp->mvstat == 1) {
1156         if (strcmp(aname, ".DS_Store") == 0)
1157             tryEvalOnly = 1;
1158         if (strcmp(aname, "Contents") == 0)
1159             tryEvalOnly = 1;
1160     }
1161 #endif
1162
1163     if (tryEvalOnly)
1164         code = afs_TryEvalFakeStat(&adp, &fakestate, &treq);
1165     else
1166         code = afs_EvalFakeStat(&adp, &fakestate, &treq);
1167     if (tryEvalOnly && adp->mvstat == 1)
1168         code = ENOENT;
1169     if (code)
1170         goto done;
1171
1172     *avcp = NULL;               /* Since some callers don't initialize it */
1173
1174     /* come back to here if we encounter a non-existent object in a read-only
1175      * volume's directory */
1176
1177   redo:
1178     *avcp = NULL;               /* Since some callers don't initialize it */
1179     bulkcode = 0;
1180
1181     if (!(adp->states & CStatd)) {
1182         if ((code = afs_VerifyVCache2(adp, &treq))) {
1183             goto done;
1184         }
1185     } else
1186         code = 0;
1187
1188     /* watch for ".." in a volume root */
1189     if (adp->mvstat == 2 && aname[0] == '.' && aname[1] == '.' && !aname[2]) {
1190         /* looking up ".." in root via special hacks */
1191         if (adp->mvid == (struct VenusFid *)0 || adp->mvid->Fid.Volume == 0) {
1192 #ifdef  AFS_OSF_ENV
1193             extern struct vcache *afs_globalVp;
1194             if (adp == afs_globalVp) {
1195                 struct vnode *rvp = AFSTOV(adp);
1196 /*
1197                 ndp->ni_vp = rvp->v_vfsp->vfs_vnodecovered;
1198                 ndp->ni_dvp = ndp->ni_vp;
1199                 VN_HOLD(*avcp);
1200 */
1201                 code = ENODEV;
1202                 goto done;
1203             }
1204 #endif
1205             code = ENODEV;
1206             goto done;
1207         }
1208         /* otherwise we have the fid here, so we use it */
1209         tvc = afs_GetVCache(adp->mvid, &treq, NULL, NULL);
1210         afs_Trace3(afs_iclSetp, CM_TRACE_GETVCDOTDOT, ICL_TYPE_FID, adp->mvid,
1211                    ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, code);
1212         *avcp = tvc;
1213         code = (tvc ? 0 : ENOENT);
1214         hit = 1;
1215         if (tvc && !VREFCOUNT(tvc)) {
1216             osi_Panic("TT1");
1217         }
1218         if (code) {
1219             /*printf("LOOKUP GETVCDOTDOT -> %d\n", code); */
1220         }
1221         goto done;
1222     }
1223
1224     /* now check the access */
1225     if (treq.uid != adp->last_looker) {
1226         if (!afs_AccessOK(adp, PRSFS_LOOKUP, &treq, CHECK_MODE_BITS)) {
1227             *avcp = NULL;
1228             code = EACCES;
1229             goto done;
1230         } else
1231             adp->last_looker = treq.uid;
1232     }
1233
1234     /* Check for read access as well.  We need read access in order to
1235      * stat files, but not to stat subdirectories. */
1236     if (!afs_AccessOK(adp, PRSFS_LOOKUP, &treq, CHECK_MODE_BITS))
1237         no_read_access = 1;
1238
1239     /* special case lookup of ".".  Can we check for it sooner in this code,
1240      * for instance, way up before "redo:" ??
1241      * I'm not fiddling with the LRUQ here, either, perhaps I should, or else
1242      * invent a lightweight version of GetVCache.
1243      */
1244     if (aname[0] == '.' && !aname[1]) { /* special case */
1245         ObtainReadLock(&afs_xvcache);
1246         osi_vnhold(adp, 0);
1247         ReleaseReadLock(&afs_xvcache);
1248         code = 0;
1249         *avcp = tvc = adp;
1250         hit = 1;
1251         if (adp && !VREFCOUNT(adp)) {
1252             osi_Panic("TT2");
1253         }
1254         goto done;
1255     }
1256
1257     Check_AtSys(adp, aname, &sysState, &treq);
1258     tname = sysState.name;
1259
1260     /* 1st Check_AtSys and lookup by tname is required here, for now,
1261      * because the dnlc is *not* told to remove entries for the parent
1262      * dir of file/dir op that afs_LocalHero likes, but dnlc is informed
1263      * if the cached entry for the parent dir is invalidated for a
1264      * non-local change.
1265      * Otherwise, we'd be able to do a dnlc lookup on an entry ending
1266      * w/@sys and know the dnlc was consistent with reality. */
1267     tvc = osi_dnlc_lookup(adp, tname, WRITE_LOCK);
1268     *avcp = tvc;                /* maybe wasn't initialized, but it is now */
1269     if (tvc) {
1270         if (no_read_access && vType(tvc) != VDIR && vType(tvc) != VLNK) {
1271             /* need read access on dir to stat non-directory / non-link */
1272             afs_PutVCache(tvc);
1273             *avcp = NULL;
1274             code = EACCES;
1275             goto done;
1276         }
1277 #ifdef AFS_LINUX22_ENV
1278         if (tvc->mvstat == 2) { /* we don't trust the dnlc for root vcaches */
1279             AFS_RELE(tvc);
1280             *avcp = 0;
1281         } else {
1282             code = 0;
1283             hit = 1;
1284             goto done;
1285         }
1286 #else /* non - LINUX */
1287         code = 0;
1288         hit = 1;
1289         goto done;
1290 #endif /* linux22 */
1291     }
1292
1293     {                           /* sub-block just to reduce stack usage */
1294         register struct dcache *tdc;
1295         afs_size_t dirOffset, dirLen;
1296         ino_t theDir;
1297         struct VenusFid tfid;
1298
1299         /* now we have to lookup the next fid */
1300         tdc =
1301             afs_GetDCache(adp, (afs_size_t) 0, &treq, &dirOffset, &dirLen, 1);
1302         if (!tdc) {
1303             *avcp = NULL;       /* redundant, but harmless */
1304             code = EIO;
1305             goto done;
1306         }
1307
1308         /* now we will just call dir package with appropriate inode.
1309          * Dirs are always fetched in their entirety for now */
1310         ObtainReadLock(&adp->lock);
1311         ObtainReadLock(&tdc->lock);
1312
1313         /*
1314          * Make sure that the data in the cache is current. There are two
1315          * cases we need to worry about:
1316          * 1. The cache data is being fetched by another process.
1317          * 2. The cache data is no longer valid
1318          */
1319         while ((adp->states & CStatd)
1320                && (tdc->dflags & DFFetching)
1321                && hsame(adp->m.DataVersion, tdc->f.versionNo)) {
1322             ReleaseReadLock(&tdc->lock);
1323             ReleaseReadLock(&adp->lock);
1324             afs_osi_Sleep(&tdc->validPos);
1325             ObtainReadLock(&adp->lock);
1326             ObtainReadLock(&tdc->lock);
1327         }
1328         if (!(adp->states & CStatd)
1329             || !hsame(adp->m.DataVersion, tdc->f.versionNo)) {
1330             ReleaseReadLock(&tdc->lock);
1331             ReleaseReadLock(&adp->lock);
1332             afs_PutDCache(tdc);
1333             if (tname && tname != aname)
1334                 osi_FreeLargeSpace(tname);
1335             goto redo;
1336         }
1337
1338         /* Save the version number for when we call osi_dnlc_enter */
1339         hset(versionNo, tdc->f.versionNo);
1340
1341         /*
1342          * check for, and handle "@sys" if it's there.  We should be able
1343          * to avoid the alloc and the strcpy with a little work, but it's
1344          * not pressing.  If there aren't any remote users (ie, via the
1345          * NFS translator), we have a slightly easier job.
1346          * the faster way to do this is to check for *aname == '@' and if
1347          * it's there, check for @sys, otherwise, assume there's no @sys
1348          * then, if the lookup fails, check for .*@sys...
1349          */
1350         /* above now implemented by Check_AtSys and Next_AtSys */
1351
1352         /* lookup the name in the appropriate dir, and return a cache entry
1353          * on the resulting fid */
1354         theDir = tdc->f.inode;
1355         code =
1356             afs_dir_LookupOffset(&theDir, sysState.name, &tfid.Fid,
1357                                  &dirCookie);
1358
1359         /* If the first lookup doesn't succeed, maybe it's got @sys in the name */
1360         while (code == ENOENT && Next_AtSys(adp, &treq, &sysState))
1361             code =
1362                 afs_dir_LookupOffset(&theDir, sysState.name, &tfid.Fid,
1363                                      &dirCookie);
1364         tname = sysState.name;
1365
1366         ReleaseReadLock(&tdc->lock);
1367         afs_PutDCache(tdc);
1368
1369         if (code == ENOENT && afs_IsDynroot(adp) && dynrootRetry) {
1370             ReleaseReadLock(&adp->lock);
1371             dynrootRetry = 0;
1372             if (tname[0] == '.')
1373                 afs_LookupAFSDB(tname + 1);
1374             else
1375                 afs_LookupAFSDB(tname);
1376             if (tname && tname != aname)
1377                 osi_FreeLargeSpace(tname);
1378             goto redo;
1379         } else {
1380             ReleaseReadLock(&adp->lock);
1381         }
1382
1383         /* new fid has same cell and volume */
1384         tfid.Cell = adp->fid.Cell;
1385         tfid.Fid.Volume = adp->fid.Fid.Volume;
1386         afs_Trace4(afs_iclSetp, CM_TRACE_LOOKUP, ICL_TYPE_POINTER, adp,
1387                    ICL_TYPE_STRING, tname, ICL_TYPE_FID, &tfid,
1388                    ICL_TYPE_INT32, code);
1389
1390         if (code) {
1391             if (code != ENOENT) {
1392                 printf("LOOKUP dirLookupOff -> %d\n", code);
1393             }
1394             goto done;
1395         }
1396
1397         /* prefetch some entries, if the dir is currently open.  The variable
1398          * dirCookie tells us where to start prefetching from.
1399          */
1400         if (AFSDOBULK && adp->opens > 0 && !(adp->states & CForeign)
1401             && !afs_IsDynroot(adp)) {
1402             afs_int32 retry;
1403             /* if the entry is not in the cache, or is in the cache,
1404              * but hasn't been statd, then do a bulk stat operation.
1405              */
1406             do {
1407                 retry = 0;
1408                 ObtainReadLock(&afs_xvcache);
1409                 tvc = afs_FindVCache(&tfid, &retry, 0 /* !stats,!lru */ );
1410                 ReleaseReadLock(&afs_xvcache);
1411             } while (tvc && retry);
1412
1413             if (!tvc || !(tvc->states & CStatd))
1414                 bulkcode = afs_DoBulkStat(adp, dirCookie, &treq);
1415             else
1416                 bulkcode = 0;
1417
1418             /* if the vcache isn't usable, release it */
1419             if (tvc && !(tvc->states & CStatd)) {
1420                 afs_PutVCache(tvc);
1421                 tvc = NULL;
1422             }
1423         } else {
1424             tvc = NULL;
1425             bulkcode = 0;
1426         }
1427
1428         /* now get the status info, if we don't already have it */
1429         /* This is kind of weird, but we might wind up accidentally calling
1430          * RXAFS_Lookup because we happened upon a file which legitimately
1431          * has a 0 uniquifier. That is the result of allowing unique to wrap
1432          * to 0. This was fixed in AFS 3.4. For CForeign, Unique == 0 means that
1433          * the file has not yet been looked up.
1434          */
1435         if (!tvc) {
1436             afs_int32 cached = 0;
1437             if (!tfid.Fid.Unique && (adp->states & CForeign)) {
1438                 tvc = afs_LookupVCache(&tfid, &treq, &cached, adp, tname);
1439             }
1440             if (!tvc && !bulkcode) {    /* lookup failed or wasn't called */
1441                 tvc = afs_GetVCache(&tfid, &treq, &cached, NULL);
1442             }
1443         }                       /* if !tvc */
1444     }                           /* sub-block just to reduce stack usage */
1445
1446     if (tvc) {
1447         int force_eval = afs_fakestat_enable ? 0 : 1;
1448
1449         if (adp->states & CForeign)
1450             tvc->states |= CForeign;
1451         tvc->parentVnode = adp->fid.Fid.Vnode;
1452         tvc->parentUnique = adp->fid.Fid.Unique;
1453         tvc->states &= ~CBulkStat;
1454
1455         if (afs_fakestat_enable == 2 && tvc->mvstat == 1) {
1456             ObtainSharedLock(&tvc->lock, 680);
1457             if (!tvc->linkData) {
1458                 UpgradeSToWLock(&tvc->lock, 681);
1459                 code = afs_HandleLink(tvc, &treq);
1460                 ConvertWToRLock(&tvc->lock);
1461             } else {
1462                 ConvertSToRLock(&tvc->lock);
1463                 code = 0;
1464             }
1465             if (!code && !afs_strchr(tvc->linkData, ':'))
1466                 force_eval = 1;
1467             ReleaseReadLock(&tvc->lock);
1468         }
1469 #if defined(UKERNEL) && defined(AFS_WEB_ENHANCEMENTS)
1470         if (!(flags & AFS_LOOKUP_NOEVAL))
1471             /* don't eval mount points */
1472 #endif /* UKERNEL && AFS_WEB_ENHANCEMENTS */
1473             if (tvc->mvstat == 1 && force_eval) {
1474                 /* a mt point, possibly unevaluated */
1475                 struct volume *tvolp;
1476
1477                 ObtainWriteLock(&tvc->lock, 133);
1478                 code = EvalMountPoint(tvc, adp, &tvolp, &treq);
1479                 ReleaseWriteLock(&tvc->lock);
1480
1481                 if (code) {
1482                     afs_PutVCache(tvc);
1483                     if (tvolp)
1484                         afs_PutVolume(tvolp, WRITE_LOCK);
1485                     goto done;
1486                 }
1487
1488                 /* next, we want to continue using the target of the mt point */
1489                 if (tvc->mvid && (tvc->states & CMValid)) {
1490                     struct vcache *uvc;
1491                     /* now lookup target, to set .. pointer */
1492                     afs_Trace2(afs_iclSetp, CM_TRACE_LOOKUP1,
1493                                ICL_TYPE_POINTER, tvc, ICL_TYPE_FID,
1494                                &tvc->fid);
1495                     uvc = tvc;  /* remember for later */
1496
1497                     if (tvolp && (tvolp->states & VForeign)) {
1498                         /* XXXX tvolp has ref cnt on but not locked! XXX */
1499                         tvc =
1500                             afs_GetRootVCache(tvc->mvid, &treq, NULL, tvolp);
1501                     } else {
1502                         tvc = afs_GetVCache(tvc->mvid, &treq, NULL, NULL);
1503                     }
1504                     afs_PutVCache(uvc); /* we're done with it */
1505
1506                     if (!tvc) {
1507                         code = ENOENT;
1508                         if (tvolp) {
1509                             afs_PutVolume(tvolp, WRITE_LOCK);
1510                         }
1511                         goto done;
1512                     }
1513
1514                     /* now, if we came via a new mt pt (say because of a new
1515                      * release of a R/O volume), we must reevaluate the ..
1516                      * ptr to point back to the appropriate place */
1517                     if (tvolp) {
1518                         ObtainWriteLock(&tvc->lock, 134);
1519                         if (tvc->mvid == NULL) {
1520                             tvc->mvid = (struct VenusFid *)
1521                                 osi_AllocSmallSpace(sizeof(struct VenusFid));
1522                         }
1523                         /* setup backpointer */
1524                         *tvc->mvid = tvolp->dotdot;
1525                         ReleaseWriteLock(&tvc->lock);
1526                         afs_PutVolume(tvolp, WRITE_LOCK);
1527                     }
1528                 } else {
1529                     afs_PutVCache(tvc);
1530                     code = ENOENT;
1531                     if (tvolp)
1532                         afs_PutVolume(tvolp, WRITE_LOCK);
1533                     goto done;
1534                 }
1535             }
1536         *avcp = tvc;
1537         if (tvc && !VREFCOUNT(tvc)) {
1538             osi_Panic("TT3");
1539         }
1540         code = 0;
1541     } else {
1542         /* if we get here, we found something in a directory that couldn't
1543          * be located (a Multics "connection failure").  If the volume is
1544          * read-only, we try flushing this entry from the cache and trying
1545          * again. */
1546         if (pass == 0) {
1547             struct volume *tv;
1548             tv = afs_GetVolume(&adp->fid, &treq, READ_LOCK);
1549             if (tv) {
1550                 if (tv->states & VRO) {
1551                     pass = 1;   /* try this *once* */
1552                     ObtainWriteLock(&afs_xcbhash, 495);
1553                     afs_DequeueCallback(adp);
1554                     /* re-stat to get later version */
1555                     adp->states &= ~CStatd;
1556                     ReleaseWriteLock(&afs_xcbhash);
1557                     osi_dnlc_purgedp(adp);
1558                     afs_PutVolume(tv, READ_LOCK);
1559                     goto redo;
1560                 }
1561                 afs_PutVolume(tv, READ_LOCK);
1562             }
1563         }
1564         code = ENOENT;
1565     }
1566
1567   done:
1568     /* put the network buffer back, if need be */
1569     if (tname != aname && tname)
1570         osi_FreeLargeSpace(tname);
1571     if (code == 0) {
1572 #ifdef  AFS_OSF_ENV
1573         /* Handle RENAME; only need to check rename "."  */
1574         if (opflag == RENAME && wantparent && *ndp->ni_next == 0) {
1575             if (!FidCmp(&(tvc->fid), &(adp->fid))) {
1576                 afs_PutVCache(*avcp);
1577                 *avcp = NULL;
1578                 afs_PutFakeStat(&fakestate);
1579                 return afs_CheckCode(EISDIR, &treq, 18);
1580             }
1581         }
1582 #endif /* AFS_OSF_ENV */
1583
1584         if (afs_mariner)
1585             afs_AddMarinerName(aname, tvc);
1586
1587 #if defined(UKERNEL) && defined(AFS_WEB_ENHANCEMENTS)
1588         if (!(flags & AFS_LOOKUP_NOEVAL))
1589             /* Here we don't enter the name into the DNLC because we want the
1590              * evaluated mount dir to be there (the vcache for the mounted volume)
1591              * rather than the vc of the mount point itself.  we can still find the
1592              * mount point's vc in the vcache by its fid. */
1593 #endif /* UKERNEL && AFS_WEB_ENHANCEMENTS */
1594             if (!hit) {
1595                 osi_dnlc_enter(adp, aname, tvc, &versionNo);
1596             } else {
1597 #ifdef AFS_LINUX20_ENV
1598                 /* So Linux inode cache is up to date. */
1599                 code = afs_VerifyVCache(tvc, &treq);
1600 #else
1601                 afs_PutFakeStat(&fakestate);
1602                 return 0;       /* can't have been any errors if hit and !code */
1603 #endif
1604             }
1605     }
1606     if (bulkcode)
1607         code = bulkcode;
1608     else
1609         code = afs_CheckCode(code, &treq, 19);
1610     if (code) {
1611         /* If there is an error, make sure *avcp is null.
1612          * Alphas panic otherwise - defect 10719.
1613          */
1614         *avcp = NULL;
1615     }
1616
1617     afs_PutFakeStat(&fakestate);
1618     return code;
1619 }