2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
17 #include <afsconfig.h>
18 #include "afs/param.h"
20 #include "afs/sysincludes.h" /* Standard vendor system headers */
21 #include "afsincludes.h" /* Afs-based standard headers */
22 #include "afs/afs_stats.h" /* statistics */
23 #include "afs/afs_cbqueue.h"
24 #include "afs/nfsclient.h"
25 #include "afs/exporter.h"
26 #include "afs/afs_osidnlc.h"
27 #include "afs/afs_dynroot.h"
29 extern struct vcache *afs_globalVp;
31 afs_int32 afs_bkvolpref = 0;
32 afs_int32 afs_bulkStatsDone;
33 static int bulkStatCounter = 0; /* counter for bulk stat seq. numbers */
34 int afs_fakestat_enable = 0; /* 1: fakestat-all, 2: fakestat-crosscell */
37 /* this would be faster if it did comparison as int32word, but would be
38 * dependant on byte-order and alignment, and I haven't figured out
39 * what "@sys" is in binary... */
40 #define AFS_EQ_ATSYS(name) (((name)[0]=='@')&&((name)[1]=='s')&&((name)[2]=='y')&&((name)[3]=='s')&&(!(name)[4]))
42 /* call under write lock, evaluate mvid.target_root field from a mt pt.
43 * avc is the vnode of the mount point object; must be write-locked.
44 * advc is the vnode of the containing directory (optional; if NULL and
45 * EvalMountPoint succeeds, caller must initialize *avolpp->dotdot)
46 * avolpp is where we return a pointer to the volume named by the mount pt, if success
47 * areq is the identity of the caller.
49 * NOTE: this function returns a held volume structure in *volpp if it returns 0!
52 EvalMountData(char type, char *data, afs_uint32 states, afs_uint32 cellnum,
53 struct volume **avolpp, struct vrequest *areq,
54 afs_uint32 *acellidxp, afs_uint32 *avolnump,
55 afs_uint32 *avnoidp, afs_uint32 *auniqp)
57 struct volume *tvp = 0;
60 char *cpos, *volnamep = NULL;
62 afs_int32 prefetch; /* 1=>None 2=>RO 3=>BK */
63 afs_int32 mtptCell, assocCell = 0, hac = 0;
64 afs_int32 samecell, roname, len;
65 afs_uint32 volid = 0, cellidx, vnoid = 0, uniq = 0;
67 /* Start by figuring out and finding the cell */
68 cpos = afs_strchr(data, ':'); /* if cell name present */
70 afs_uint32 mtptCellnum;
73 if ((afs_strtoi_r(data, &endptr, &mtptCellnum) == 0) &&
75 tcell = afs_GetCell(mtptCellnum, READ_LOCK);
77 tcell = afs_GetCellByName(data, READ_LOCK);
82 tcell = afs_GetCell(cellnum, READ_LOCK);
84 /* No cellname or cellnum; return ENODEV */
88 /* no cell found; return ENODEV */
92 cellidx = tcell->cellIndex;
93 mtptCell = tcell->cellNum; /* The cell for the mountpoint */
95 hac = 1; /* has associated cell */
96 assocCell = tcell->lcellp->cellNum; /* The associated cell */
98 afs_PutCell(tcell, READ_LOCK);
100 /* If there's nothing to look up, we can't proceed */
104 /* cell found. figure out volume */
105 cpos = afs_strchr(volnamep, ':');
109 /* Look for an all-numeric volume ID */
110 if ((afs_strtoi_r(volnamep, &endptr, &volid) == 0) &&
111 ((endptr == cpos) || (!*endptr)))
113 /* Ok. Is there a vnode and uniq? */
115 char *vnodep = (char *)(cpos + 1);
117 if ((!*vnodep) /* no vnode after colon */
118 || !(uniqp = afs_strchr(vnodep, ':')) /* no colon for uniq */
119 || (!*(++uniqp)) /* no uniq after colon */
120 || (afs_strtoi_r(vnodep, &endptr, &vnoid) != 0) /* bad vno */
121 || (*endptr != ':') /* bad vnode field */
122 || (afs_strtoi_r(uniqp, &endptr, &uniq) != 0) /* bad uniq */
123 || (*endptr)) /* anything after uniq */
126 /* sorry. vnode and uniq, or nothing */
134 * If the volume ID was all-numeric, and they didn't ask for a
135 * pointer to the volume structure, then just return the number
136 * as-is. This is currently only used for handling name lookups
137 * in the dynamic mount directory.
139 if (volid && !avolpp) {
146 * If the volume ID was all-numeric, and the type was '%', then
147 * assume whoever made the mount point knew what they were doing,
148 * and don't second-guess them by forcing use of a RW volume when
149 * they gave the ID of something else.
151 if (volid && type == '%') {
152 tfid.Fid.Volume = volid; /* remember BK volume */
153 tfid.Cell = mtptCell;
154 tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */
155 if (cpos) /* one way or another we're done */
158 return ENODEV; /* afs_GetVolume failed; return ENODEV */
162 /* Is volume name a "<n>.backup" or "<n>.readonly" name */
163 len = strlen(volnamep);
164 roname = ((len > 9) && (strcmp(&volnamep[len - 9], ".readonly") == 0))
165 || ((len > 7) && (strcmp(&volnamep[len - 7], ".backup") == 0));
167 /* When we cross mountpoint, do we stay in the same cell */
168 samecell = (cellnum == mtptCell) || (hac && (cellnum == assocCell));
170 /* Decide whether to prefetch the BK, or RO. Also means we want the BK or
172 * If this is a regular mountpoint with a RW volume name
173 * - If BK preference is enabled AND we remain within the same cell AND
174 * start from a BK volume, then we will want to prefetch the BK volume.
175 * - If we cross a cell boundary OR start from a RO volume, then we will
176 * want to prefetch the RO volume.
178 if ((type == '#') && !roname) {
179 if (afs_bkvolpref && samecell && (states & CBackup))
180 prefetch = 3; /* Prefetch the BK */
181 else if (!samecell || (states & CRO))
182 prefetch = 2; /* Prefetch the RO */
184 prefetch = 1; /* Do not prefetch */
186 prefetch = 1; /* Do not prefetch */
189 /* Get the volume struct. Unless this volume name has ".readonly" or
190 * ".backup" in it, this will get the volume struct for the RW volume.
191 * The RO volume will be prefetched if requested (but not returned).
192 * Set up to use volname first.
194 tvp = afs_GetVolumeByName(volnamep, mtptCell, prefetch, areq, WRITE_LOCK);
196 /* If no volume was found in this cell, try the associated linked cell */
197 if (!tvp && hac && areq->volumeError) {
199 afs_GetVolumeByName(volnamep, assocCell, prefetch, areq,
203 /* done with volname */
207 return ENODEV; /* Couldn't find the volume */
211 /* Don't cross mountpoint from a BK to a BK volume */
212 if ((states & CBackup) && (tvp->states & VBackup)) {
213 afs_PutVolume(tvp, WRITE_LOCK);
217 /* If we want (prefetched) the BK and it exists, then drop the RW volume
219 * Otherwise, if we want (prefetched0 the RO and it exists, then drop the
220 * RW volume and get the RO.
221 * Otherwise, go with the RW.
223 if ((prefetch == 3) && tvp->backVol) {
224 tfid.Fid.Volume = tvp->backVol; /* remember BK volume */
225 tfid.Cell = tvp->cell;
226 afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
227 tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */
229 return ENODEV; /* oops, can't do it */
230 } else if ((prefetch >= 2) && tvp->roVol) {
231 tfid.Fid.Volume = tvp->roVol; /* remember RO volume */
232 tfid.Cell = tvp->cell;
233 afs_PutVolume(tvp, WRITE_LOCK); /* release old volume */
234 tvp = afs_GetVolume(&tfid, areq, WRITE_LOCK); /* get the new one */
236 return ENODEV; /* oops, can't do it */
241 *acellidxp = cellidx;
251 afs_PutVolume(tvp, WRITE_LOCK);
256 EvalMountPoint(struct vcache *avc, struct vcache *advc,
257 struct volume **avolpp, struct vrequest *areq)
260 afs_uint32 avnoid, auniq;
262 AFS_STATCNT(EvalMountPoint);
264 code = afs_HandleLink(avc, areq);
268 /* Determine which cell and volume the mointpoint goes to */
269 code = EvalMountData(avc->linkData[0], avc->linkData + 1,
270 avc->f.states, avc->f.fid.Cell, avolpp, areq, 0, 0,
272 if (code) return code;
280 if (avc->mvid.target_root == NULL)
281 avc->mvid.target_root = osi_AllocSmallSpace(sizeof(struct VenusFid));
282 avc->mvid.target_root->Cell = (*avolpp)->cell;
283 avc->mvid.target_root->Fid.Volume = (*avolpp)->volume;
284 avc->mvid.target_root->Fid.Vnode = avnoid;
285 avc->mvid.target_root->Fid.Unique = auniq;
286 avc->f.states |= CMValid;
288 /* Used to: if the mount point is stored within a backup volume,
289 * then we should only update the parent pointer information if
290 * there's none already set, so as to avoid updating a volume's ..
291 * info with something in an OldFiles directory.
293 * Next two lines used to be under this if:
295 * if (!(avc->f.states & CBackup) || tvp->dotdot.Fid.Volume == 0)
297 * Now: update mount point back pointer on every call, so that we handle
298 * multiple mount points better. This way, when du tries to go back
299 * via chddir(".."), it will end up exactly where it started, yet
300 * cd'ing via a new path to a volume will reset the ".." pointer
303 (*avolpp)->mtpoint = avc->f.fid; /* setup back pointer to mtpoint */
306 (*avolpp)->dotdot = advc->f.fid;
314 * Must be called on an afs_fakestat_state object before calling
315 * afs_EvalFakeStat or afs_PutFakeStat. Calling afs_PutFakeStat
316 * without calling afs_EvalFakeStat is legal, as long as this
317 * function is called.
320 afs_InitFakeStat(struct afs_fakestat_state *state)
322 if (!afs_fakestat_enable)
327 state->need_release = 0;
331 * afs_EvalFakeStat_int
333 * The actual implementation of afs_EvalFakeStat and afs_TryEvalFakeStat,
334 * which is called by those wrapper functions.
336 * Only issues RPCs if canblock is non-zero.
339 afs_EvalFakeStat_int(struct vcache **avcp, struct afs_fakestat_state *state,
340 struct vrequest *areq, int canblock)
342 struct vcache *tvc, *root_vp;
343 struct volume *tvolp = NULL;
346 if (!afs_fakestat_enable)
349 osi_Assert(state->valid == 1);
350 osi_Assert(state->did_eval == 0);
354 if (tvc->mvstat != AFS_MVSTAT_MTPT)
358 /* Is the call to VerifyVCache really necessary? */
359 code = afs_VerifyVCache(tvc, areq);
363 ObtainWriteLock(&tvc->lock, 599);
364 code = EvalMountPoint(tvc, NULL, &tvolp, areq);
365 ReleaseWriteLock(&tvc->lock);
369 tvolp->dotdot = tvc->f.fid;
370 tvolp->dotdot.Fid.Vnode = tvc->f.parent.vnode;
371 tvolp->dotdot.Fid.Unique = tvc->f.parent.unique;
374 if (tvc->mvid.target_root && (tvc->f.states & CMValid)) {
380 ObtainReadLock(&afs_xvcache);
381 root_vp = afs_FindVCache(tvc->mvid.target_root, &retry, 0);
382 if (root_vp && retry) {
383 ReleaseReadLock(&afs_xvcache);
384 afs_PutVCache(root_vp);
386 } while (root_vp && retry);
387 ReleaseReadLock(&afs_xvcache);
389 root_vp = afs_GetVCache(tvc->mvid.target_root, areq);
392 code = canblock ? EIO : 0;
395 #ifdef AFS_DARWIN80_ENV
396 root_vp->f.m.Type = VDIR;
398 code = afs_darwin_finalizevnode(root_vp, NULL, NULL, 0, 0);
401 vnode_ref(AFSTOV(root_vp));
403 if (tvolp && !afs_InReadDir(root_vp)) {
404 /* Is this always kosher? Perhaps we should instead use
405 * NBObtainWriteLock to avoid potential deadlock.
407 ObtainWriteLock(&root_vp->lock, 598);
408 if (!root_vp->mvid.parent)
409 root_vp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid));
410 *root_vp->mvid.parent = tvolp->dotdot;
411 ReleaseWriteLock(&root_vp->lock);
413 state->need_release = 1;
414 state->root_vp = root_vp;
418 code = canblock ? EIO : 0;
423 afs_PutVolume(tvolp, WRITE_LOCK);
430 * Automatically does the equivalent of EvalMountPoint for vcache entries
431 * which are mount points. Remembers enough state to properly release
432 * the volume root vcache when afs_PutFakeStat() is called.
434 * State variable must be initialized by afs_InitFakeState() beforehand.
436 * Returns 0 when everything succeeds and *avcp points to the vcache entry
437 * that should be used for the real vnode operation. Returns non-zero if
438 * something goes wrong and the error code should be returned to the user.
441 afs_EvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
442 struct vrequest *areq)
444 return afs_EvalFakeStat_int(avcp, state, areq, 1);
448 * afs_TryEvalFakeStat
450 * Same as afs_EvalFakeStat, but tries not to talk to remote servers
451 * and only evaluate the mount point if all the data is already in
454 * Returns 0 if everything succeeds and *avcp points to a valid
455 * vcache entry (possibly evaluated).
458 afs_TryEvalFakeStat(struct vcache **avcp, struct afs_fakestat_state *state,
459 struct vrequest *areq)
461 return afs_EvalFakeStat_int(avcp, state, areq, 0);
467 * Perform any necessary cleanup at the end of a vnode op, given that
468 * afs_InitFakeStat was previously called with this state.
471 afs_PutFakeStat(struct afs_fakestat_state *state)
473 if (!afs_fakestat_enable)
476 osi_Assert(state->valid == 1);
477 if (state->need_release)
478 afs_PutVCache(state->root_vp);
483 afs_ENameOK(char *aname)
487 AFS_STATCNT(ENameOK);
488 tlen = strlen(aname);
489 if (tlen >= 4 && strcmp(aname + tlen - 4, "@sys") == 0)
495 afs_getsysname(struct vrequest *areq, struct vcache *adp,
496 char *bufp, int *num, char **sysnamelist[])
501 AFS_STATCNT(getsysname);
503 *sysnamelist = afs_sysnamelist;
505 if (!afs_nfsexporter)
506 strcpy(bufp, (*sysnamelist)[0]);
508 au = afs_GetUser(areq->uid, adp->f.fid.Cell, READ_LOCK);
510 error = EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, num, 0);
512 strcpy(bufp, "@sys");
513 afs_PutUser(au, READ_LOCK);
516 strcpy(bufp, (*sysnamelist)[0]);
519 strcpy(bufp, afs_sysname);
520 afs_PutUser(au, READ_LOCK);
526 Check_AtSys(struct vcache *avc, const char *aname,
527 struct sysname_info *state, struct vrequest *areq)
530 char **sysnamelist[MAXNUMSYSNAMES];
532 if (AFS_EQ_ATSYS(aname)) {
534 state->name = osi_AllocLargeSpace(MAXSYSNAME);
537 afs_getsysname(areq, avc, state->name, &num, sysnamelist);
542 state->name = (char *)aname;
547 Next_AtSys(struct vcache *avc, struct vrequest *areq,
548 struct sysname_info *state)
550 int num = afs_sysnamecount;
551 char **sysnamelist[MAXNUMSYSNAMES];
553 if (state->index == -1)
554 return 0; /* No list */
556 /* Check for the initial state of aname != "@sys" in Check_AtSys */
557 if (state->offset == -1 && state->allocked == 0) {
560 /* Check for .*@sys */
561 for (tname = state->name; *tname; tname++)
562 /*Move to the end of the string */ ;
564 if ((tname > state->name + 4) && (AFS_EQ_ATSYS(tname - 4))) {
565 state->offset = (tname - 4) - state->name;
566 tname = osi_AllocLargeSpace(AFS_LRALLOCSIZ);
567 strncpy(tname, state->name, state->offset);
572 afs_getsysname(areq, avc, state->name + state->offset, &num,
576 return 0; /* .*@sys doesn't match either */
581 *sysnamelist = afs_sysnamelist;
583 if (afs_nfsexporter) {
584 au = afs_GetUser(areq->uid, avc->f.fid.Cell, READ_LOCK);
587 EXP_SYSNAME(au->exporter, (char *)0, sysnamelist, &num, 0);
589 afs_PutUser(au, READ_LOCK);
593 afs_PutUser(au, READ_LOCK);
595 if (++(state->index) >= num || !(*sysnamelist)[(unsigned int)state->index])
596 return 0; /* end of list */
598 strcpy(state->name + state->offset, (*sysnamelist)[(unsigned int)state->index]);
603 afs_CheckBulkStatus(struct afs_conn *tc, int nFids, AFSBulkStats *statParm,
609 if (statParm->AFSBulkStats_len != nFids || cbParm->AFSCBs_len != nFids) {
610 afs_warn("afs: BulkFetchStatus length %u/%u, expected %u\n",
611 (unsigned)statParm->AFSBulkStats_len,
612 (unsigned)cbParm->AFSCBs_len, nFids);
613 afs_BadFetchStatus(tc);
616 for (i = 0; i < nFids; i++) {
617 if (statParm->AFSBulkStats_val[i].errorCode) {
620 code = afs_CheckFetchStatus(tc, &statParm->AFSBulkStats_val[i]);
629 extern int BlobScan(struct dcache * afile, afs_int32 ablob, afs_int32 *ablobOut);
631 /* called with an unlocked directory and directory cookie. Areqp
632 * describes who is making the call.
633 * Scans the next N (about 30, typically) directory entries, and does
634 * a bulk stat call to stat them all.
636 * Must be very careful when merging in RPC responses, since we dont
637 * want to overwrite newer info that was added by a file system mutating
638 * call that ran concurrently with our bulk stat call.
640 * We do that, as described below, by not merging in our info (always
641 * safe to skip the merge) if the status info is valid in the vcache entry.
643 * If adapt ever implements the bulk stat RPC, then this code will need to
644 * ensure that vcaches created for failed RPC's to older servers have the
647 static struct vcache *BStvc = NULL;
650 afs_DoBulkStat(struct vcache *adp, long dirCookie, struct vrequest *areqp)
652 int nentries; /* # of entries to prefetch */
653 int nskip; /* # of slots in the LRU queue to skip */
654 #ifdef AFS_DARWIN80_ENV
658 struct vcache *lruvcp; /* vcache ptr of our goal pos in LRU queue */
659 struct dcache *dcp; /* chunk containing the dir block */
660 afs_size_t temp; /* temp for holding chunk length, &c. */
661 struct AFSFid *fidsp; /* file IDs were collecting */
662 struct AFSCallBack *cbsp; /* call back pointers */
663 struct AFSCallBack *tcbp; /* temp callback ptr */
664 struct AFSFetchStatus *statsp; /* file status info */
665 struct AFSVolSync volSync; /* vol sync return info */
666 struct vcache *tvcp; /* temp vcp */
667 struct afs_q *tq; /* temp queue variable */
668 AFSCBFids fidParm; /* file ID parm for bulk stat */
669 AFSBulkStats statParm; /* stat info parm for bulk stat */
670 int fidIndex = 0; /* which file were stating */
671 struct afs_conn *tcp = 0; /* conn for call */
672 AFSCBs cbParm; /* callback parm for bulk stat */
673 struct server *hostp = 0; /* host we got callback from */
674 long startTime; /* time we started the call,
675 * for callback expiration base
677 #if defined(AFS_DARWIN_ENV)
678 int ftype[4] = {VNON, VREG, VDIR, VLNK}; /* verify type is as expected */
680 afs_size_t statSeqNo = 0; /* Valued of file size to detect races */
681 int code; /* error code */
682 afs_int32 newIndex; /* new index in the dir */
683 struct DirBuffer entry; /* Buffer for dir manipulation */
684 struct DirEntry *dirEntryp; /* dir entry we are examining */
686 struct VenusFid afid; /* file ID we are using now */
687 struct VenusFid tfid; /* another temp. file ID */
688 afs_int32 retry; /* handle low-level SGI MP race conditions */
689 long volStates; /* flags from vol structure */
690 struct volume *volp = 0; /* volume ptr */
691 struct VenusFid dotdot = {0, {0, 0, 0}};
692 int flagIndex = 0; /* First file with bulk fetch flag set */
693 struct rx_connection *rxconn;
697 dotdot.Fid.Unique = 0;
698 dotdot.Fid.Vnode = 0;
700 /* first compute some basic parameters. We dont want to prefetch more
701 * than a fraction of the cache in any given call, and we want to preserve
702 * a portion of the LRU queue in any event, so as to avoid thrashing
703 * the entire stat cache (we will at least leave some of it alone).
704 * presently dont stat more than 1/8 the cache in any one call. */
705 nentries = afs_cacheStats / 8;
707 /* dont bother prefetching more than one calls worth of info */
708 if (nentries > AFSCBMAX)
711 /* heuristic to make sure that things fit in 4K. This means that
712 * we shouldnt make it any bigger than 47 entries. I am typically
713 * going to keep it a little lower, since we don't want to load
714 * too much of the stat cache.
719 /* now, to reduce the stack size, well allocate two 4K blocks,
720 * one for fids and callbacks, and one for stat info. Well set
721 * up our pointers to the memory from there, too.
723 statsp = osi_Alloc(AFSCBMAX * sizeof(AFSFetchStatus));
724 fidsp = osi_AllocLargeSpace(nentries * sizeof(AFSFid));
725 cbsp = osi_Alloc(AFSCBMAX * sizeof(AFSCallBack));
727 /* next, we must iterate over the directory, starting from the specified
728 * cookie offset (dirCookie), and counting out nentries file entries.
729 * We skip files that already have stat cache entries, since we
730 * dont want to bulk stat files that are already in the cache.
733 code = afs_VerifyVCache(adp, areqp);
737 dcp = afs_GetDCache(adp, (afs_size_t) 0, areqp, &temp, &temp, 1);
743 /* lock the directory cache entry */
744 ObtainReadLock(&adp->lock);
745 ObtainReadLock(&dcp->lock);
748 * Make sure that the data in the cache is current. There are two
749 * cases we need to worry about:
750 * 1. The cache data is being fetched by another process.
751 * 2. The cache data is no longer valid
753 while ((adp->f.states & CStatd)
754 && (dcp->dflags & DFFetching)
755 && afs_IsDCacheFresh(dcp, adp)) {
756 afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
757 __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, dcp,
758 ICL_TYPE_INT32, dcp->dflags);
759 ReleaseReadLock(&dcp->lock);
760 ReleaseReadLock(&adp->lock);
761 afs_osi_Sleep(&dcp->validPos);
762 ObtainReadLock(&adp->lock);
763 ObtainReadLock(&dcp->lock);
765 if (!(adp->f.states & CStatd)
766 || !afs_IsDCacheFresh(dcp, adp)) {
767 ReleaseReadLock(&dcp->lock);
768 ReleaseReadLock(&adp->lock);
773 /* Generate a sequence number so we can tell whether we should
774 * store the attributes when processing the response. This number is
775 * stored in the file size when we set the CBulkFetching bit. If the
776 * CBulkFetching is still set and this value hasn't changed, then
777 * we know we were the last to set CBulkFetching bit for this file,
778 * and it is safe to set the status information for this file.
780 statSeqNo = bulkStatCounter++;
781 /* ensure against wrapping */
783 statSeqNo = bulkStatCounter++;
785 /* now we have dir data in the cache, so scan the dir page */
790 * Only examine at most the next 'nentries*4' entries to find dir entries
791 * to stat. This is an arbitrary limit that we set so we don't waste time
792 * scanning an entire dir that contains stat'd entries. For example, if a
793 * dir contains 10k entries, and all or almost all of them are stat'd, then
794 * we'll examine 10k entries for no benefit. For each entry, we run
795 * afs_FindVCache, and grab and release afs_xvcache; doing this e.g. 10k
796 * times can have significant impact if the client is under a lot of load.
798 for (attempt_i = 0; attempt_i < nentries * 4; attempt_i++) {
800 /* look for first safe entry to examine in the directory. BlobScan
801 * looks for a the 1st allocated dir after the dirCookie slot.
803 code = BlobScan(dcp, (dirCookie >> 5), &newIndex);
804 if (code || newIndex == 0)
807 /* remember the updated directory cookie */
808 dirCookie = newIndex << 5;
810 /* get a ptr to the dir entry */
811 code = afs_dir_GetBlob(dcp, newIndex, &entry);
814 dirEntryp = (struct DirEntry *)entry.data;
816 /* dont copy more than we have room for */
817 if (fidIndex >= nentries) {
822 /* now, if the dir entry looks good, copy it out to our list. Vnode
823 * 0 means deleted, although it should also be free were it deleted.
825 if (dirEntryp->fid.vnode != 0) {
826 /* dont copy entries we have in our cache. This check will
827 * also make us skip "." and probably "..", unless it has
828 * disappeared from the cache since we did our namei call.
830 tfid.Cell = adp->f.fid.Cell;
831 tfid.Fid.Volume = adp->f.fid.Fid.Volume;
832 tfid.Fid.Vnode = ntohl(dirEntryp->fid.vnode);
833 tfid.Fid.Unique = ntohl(dirEntryp->fid.vunique);
836 ObtainSharedLock(&afs_xvcache, 130);
837 tvcp = afs_FindVCache(&tfid, &retry, IS_SLOCK /* no stats | LRU */ );
839 ReleaseSharedLock(&afs_xvcache);
842 } while (tvcp && retry);
843 if (!tvcp) { /* otherwise, create manually */
844 UpgradeSToWLock(&afs_xvcache, 129);
845 tvcp = afs_NewBulkVCache(&tfid, hostp, statSeqNo);
848 ObtainWriteLock(&tvcp->lock, 505);
849 #ifdef AFS_DARWIN80_ENV
850 /* use even/odd hack to guess file versus dir.
851 let links be reaped. oh well. */
852 if (dirEntryp->fid.vnode & 1)
853 tvcp->f.m.Type = VDIR;
855 tvcp->f.m.Type = VREG;
856 /* finalize to a best guess */
857 afs_darwin_finalizevnode(tvcp, AFSTOV(adp), NULL, 0, 1);
858 /* re-acquire usecount that finalizevnode disposed of */
859 vnode_ref(AFSTOV(tvcp));
861 ReleaseWriteLock(&afs_xvcache);
862 afs_RemoveVCB(&tfid);
863 ReleaseWriteLock(&tvcp->lock);
865 ReleaseWriteLock(&afs_xvcache);
868 ReleaseSharedLock(&afs_xvcache);
873 ReleaseReadLock(&dcp->lock);
874 ReleaseReadLock(&adp->lock);
876 goto done; /* can happen if afs_NewVCache fails */
879 /* WARNING: afs_DoBulkStat uses the Length field to store a
880 * sequence number for each bulk status request. Under no
881 * circumstances should afs_DoBulkStat store a sequence number
882 * if the new length will be ignored when afs_ProcessFS is
883 * called with new stats. */
885 if (!(tvcp->f.states & CStatd)
886 && (!((tvcp->f.states & CBulkFetching) &&
887 (tvcp->f.m.Length != statSeqNo)))
888 && (tvcp->execsOrWriters <= 0)
889 && !afs_DirtyPages(tvcp)
890 && !AFS_VN_MAPPED((vnode_t *) tvcp))
892 if (!(tvcp->f.states & CStatd)
893 && (!((tvcp->f.states & CBulkFetching) &&
894 (tvcp->f.m.Length != statSeqNo)))
895 && (tvcp->execsOrWriters <= 0)
896 && !afs_DirtyPages(tvcp))
900 /* this entry doesnt exist in the cache, and is not
901 * already being fetched by someone else, so add it to the
902 * list of file IDs to obtain.
904 * We detect a callback breaking race condition by checking the
905 * CBulkFetching state bit and the value in the file size.
906 * It is safe to set the status only if the CBulkFetching
907 * flag is still set and the value in the file size does
908 * not change. NewBulkVCache sets us up for the new ones.
909 * Set up the rest here.
911 * Don't fetch status for dirty files. We need to
912 * preserve the value of the file size. We could
913 * flush the pages, but it wouldn't be worthwhile.
915 if (!(tvcp->f.states & CBulkFetching)) {
916 tvcp->f.states |= CBulkFetching;
917 tvcp->f.m.Length = statSeqNo;
919 memcpy((char *)(fidsp + fidIndex), (char *)&tfid.Fid,
926 /* if dir vnode has non-zero entry */
927 /* move to the next dir entry by adding in the # of entries
928 * used by this dir entry.
930 temp = afs_dir_NameBlobs(dirEntryp->name) << 5;
935 } /* for loop over dir entries */
937 /* now release the dir lock and prepare to make the bulk RPC */
938 ReleaseReadLock(&dcp->lock);
939 ReleaseReadLock(&adp->lock);
941 /* release the chunk */
944 /* dont make a null call */
949 /* setup the RPC parm structures */
950 fidParm.AFSCBFids_len = fidIndex;
951 fidParm.AFSCBFids_val = fidsp;
952 statParm.AFSBulkStats_len = fidIndex;
953 statParm.AFSBulkStats_val = statsp;
954 cbParm.AFSCBs_len = fidIndex;
955 cbParm.AFSCBs_val = cbsp;
957 /* start the timer; callback expirations are relative to this */
958 startTime = osi_Time();
960 tcp = afs_Conn(&adp->f.fid, areqp, SHARED_LOCK, &rxconn);
962 hostp = tcp->parent->srvr->server;
964 for (i = 0; i < fidIndex; i++) {
965 /* we must set tvcp->callback before the BulkStatus call, so
966 * we can detect concurrent InitCallBackState's */
968 afid.Cell = adp->f.fid.Cell;
969 afid.Fid.Volume = adp->f.fid.Fid.Volume;
970 afid.Fid.Vnode = fidsp[i].Vnode;
971 afid.Fid.Unique = fidsp[i].Unique;
975 ObtainReadLock(&afs_xvcache);
976 tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */);
977 ReleaseReadLock(&afs_xvcache);
978 } while (tvcp && retry);
984 if ((tvcp->f.states & CBulkFetching) &&
985 (tvcp->f.m.Length == statSeqNo)) {
986 tvcp->callback = hostp;
993 XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_BULKSTATUS);
995 if (!(tcp->parent->srvr->server->flags & SNO_INLINEBULK)) {
998 RXAFS_InlineBulkStatus(rxconn, &fidParm, &statParm,
1001 if (code == RXGEN_OPCODE) {
1002 tcp->parent->srvr->server->flags |= SNO_INLINEBULK;
1005 RXAFS_BulkStatus(rxconn, &fidParm, &statParm,
1012 RXAFS_BulkStatus(rxconn, &fidParm, &statParm, &cbParm,
1019 code = afs_CheckBulkStatus(tcp, fidIndex, &statParm, &cbParm);
1023 /* make sure we give afs_Analyze a chance to retry,
1024 * but if the RPC succeeded we may have entries to merge.
1025 * if we wipe code with one entry's status we get bogus failures.
1027 } while (afs_Analyze
1028 (tcp, rxconn, code ? code : (&statsp[0])->errorCode,
1029 &adp->f.fid, areqp, AFS_STATS_FS_RPCIDX_BULKSTATUS,
1030 SHARED_LOCK, NULL));
1032 /* now, if we didnt get the info, bail out. */
1036 /* we need vol flags to create the entries properly */
1037 dotdot.Fid.Volume = 0;
1038 volp = afs_GetVolume(&adp->f.fid, areqp, READ_LOCK);
1040 volStates = volp->states;
1041 if (volp->dotdot.Fid.Volume != 0)
1042 dotdot = volp->dotdot;
1046 /* find the place to merge the info into We do this by skipping
1047 * nskip entries in the LRU queue. The more we skip, the more
1048 * we preserve, since the head of the VLRU queue is the most recently
1052 nskip = afs_cacheStats / 2; /* preserved fraction of the cache */
1053 ObtainReadLock(&afs_xvcache);
1054 #ifdef AFS_DARWIN80_ENV
1057 if (QEmpty(&VLRU)) {
1058 /* actually a serious error, probably should panic. Probably will
1059 * panic soon, oh well. */
1060 ReleaseReadLock(&afs_xvcache);
1061 afs_warnuser("afs_DoBulkStat: VLRU empty!");
1064 if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1065 refpanic("Bulkstat VLRU inconsistent");
1067 for (tq = VLRU.next; tq != &VLRU; tq = QNext(tq)) {
1069 #ifdef AFS_DARWIN80_ENV
1070 if ((!(QTOV(tq)->f.states & CDeadVnode)&&!(QTOV(tq)->f.states & CVInit)))
1074 if (QNext(QPrev(tq)) != tq) {
1076 refpanic("BulkStat VLRU inconsistent");
1082 lruvcp = QTOV(VLRU.next);
1084 /* now we have to hold this entry, so that it does not get moved
1085 * into the free list while we're running. It could still get
1086 * moved within the lru queue, but hopefully that will be rare; it
1087 * doesn't hurt nearly as much.
1090 #ifdef AFS_DARWIN80_ENV
1091 if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit))) {
1097 panic("Can't find non-dead vnode in VLRU\n");
1099 lruvp = AFSTOV(lruvcp);
1100 if (vnode_get(lruvp)) /* this bumps ref count */
1102 else if (vnode_ref(lruvp)) {
1104 /* AFSTOV(lruvcp) may be NULL */
1110 if (osi_vnhold(lruvcp) != 0) {
1114 ReleaseReadLock(&afs_xvcache); /* could be read lock */
1118 /* otherwise, merge in the info. We have to be quite careful here,
1119 * since we need to ensure that we don't merge old info over newer
1120 * stuff in a stat cache entry. We're very conservative here: we don't
1121 * do the merge at all unless we ourselves create the stat cache
1122 * entry. That's pretty safe, and should work pretty well, since we
1123 * typically expect to do the stat cache creation ourselves.
1125 * We also have to take into account racing token revocations.
1127 for (i = 0; i < fidIndex; i++) {
1128 if ((&statsp[i])->errorCode)
1130 afid.Cell = adp->f.fid.Cell;
1131 afid.Fid.Volume = adp->f.fid.Fid.Volume;
1132 afid.Fid.Vnode = fidsp[i].Vnode;
1133 afid.Fid.Unique = fidsp[i].Unique;
1136 ObtainReadLock(&afs_xvcache);
1137 tvcp = afs_FindVCache(&afid, &retry, 0/* !stats&!lru */);
1138 ReleaseReadLock(&afs_xvcache);
1139 } while (tvcp && retry);
1141 /* The entry may no longer exist */
1146 /* now we have the entry held, but we need to fill it in */
1147 ObtainWriteLock(&tvcp->lock, 131);
1149 /* if CBulkFetching is not set, or if the file size no longer
1150 * matches the value we placed there when we set the CBulkFetching
1151 * flag, then someone else has done something with this node,
1152 * and we may not have the latest status information for this
1153 * file. Leave the entry alone. There's also a file type
1154 * change here, for OSX bulkstat support.
1156 if (!(tvcp->f.states & CBulkFetching)
1157 || (tvcp->f.m.Length != statSeqNo)
1158 #if defined(AFS_DARWIN_ENV)
1159 || (ftype[(&statsp[i])->FileType] != vType(tvcp))
1163 ReleaseWriteLock(&tvcp->lock);
1164 afs_PutVCache(tvcp);
1168 /* now copy ".." entry back out of volume structure, if necessary */
1169 if (tvcp->mvstat == AFS_MVSTAT_ROOT && (dotdot.Fid.Volume != 0)) {
1170 if (!tvcp->mvid.parent)
1171 tvcp->mvid.parent = osi_AllocSmallSpace(sizeof(struct VenusFid));
1172 *tvcp->mvid.parent = dotdot;
1175 #ifdef AFS_DARWIN80_ENV
1176 if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit)))
1177 panic("vlru control point went dead\n");
1180 ObtainWriteLock(&afs_xvcache, 132);
1181 if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1182 refpanic("Bulkstat VLRU inconsistent2");
1184 if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
1185 || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
1186 refpanic("Bulkstat VLRU inconsistent4");
1188 if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
1189 || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
1190 refpanic("Bulkstat VLRU inconsistent5");
1193 if (tvcp != lruvcp) { /* if they are == don't move it, don't corrupt vlru */
1194 QRemove(&tvcp->vlruq);
1195 QAdd(&lruvcp->vlruq, &tvcp->vlruq);
1198 if ((VLRU.next->prev != &VLRU) || (VLRU.prev->next != &VLRU)) {
1199 refpanic("Bulkstat VLRU inconsistent3");
1201 if ((QNext(QPrev(&tvcp->vlruq)) != &tvcp->vlruq)
1202 || (QPrev(QNext(&tvcp->vlruq)) != &tvcp->vlruq)) {
1203 refpanic("Bulkstat VLRU inconsistent5");
1205 if ((QNext(QPrev(&lruvcp->vlruq)) != &lruvcp->vlruq)
1206 || (QPrev(QNext(&lruvcp->vlruq)) != &lruvcp->vlruq)) {
1207 refpanic("Bulkstat VLRU inconsistent6");
1209 ReleaseWriteLock(&afs_xvcache);
1211 ObtainWriteLock(&afs_xcbhash, 494);
1213 /* We need to check the flags again. We may have missed
1214 * something while we were waiting for a lock.
1216 if (!(tvcp->f.states & CBulkFetching) || (tvcp->f.m.Length != statSeqNo)) {
1218 ReleaseWriteLock(&tvcp->lock);
1219 ReleaseWriteLock(&afs_xcbhash);
1220 afs_PutVCache(tvcp);
1224 /* now merge in the resulting status back into the vnode.
1225 * We only do this if the entry looks clear.
1227 afs_ProcessFS(tvcp, &statsp[i], areqp);
1228 #if defined(AFS_LINUX_ENV)
1229 afs_fill_inode(AFSTOV(tvcp), NULL); /* reset inode operations */
1232 /* do some accounting for bulk stats: mark this entry as
1233 * loaded, so we can tell if we use it before it gets
1236 tvcp->f.states |= CBulkStat;
1237 tvcp->f.states &= ~CBulkFetching;
1239 afs_bulkStatsDone++;
1241 /* merge in vol info */
1242 if (volStates & VRO)
1243 tvcp->f.states |= CRO;
1244 if (volStates & VBackup)
1245 tvcp->f.states |= CBackup;
1246 if (volStates & VForeign)
1247 tvcp->f.states |= CForeign;
1249 /* merge in the callback info */
1250 tvcp->f.states |= CTruth;
1252 /* get ptr to the callback we are interested in */
1255 if (tcbp->ExpirationTime != 0) {
1256 tvcp->cbExpires = tcbp->ExpirationTime + startTime;
1257 tvcp->callback = hostp;
1258 tvcp->f.states |= CStatd;
1259 afs_QueueCallback(tvcp, CBHash(tcbp->ExpirationTime), volp);
1260 } else if (tvcp->f.states & CRO) {
1261 /* ordinary callback on a read-only volume -- AFS 3.2 style */
1262 tvcp->cbExpires = 3600 + startTime;
1263 tvcp->callback = hostp;
1264 tvcp->f.states |= CStatd;
1265 afs_QueueCallback(tvcp, CBHash(3600), volp);
1267 afs_StaleVCacheFlags(tvcp,
1268 AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB,
1271 #ifdef AFS_DARWIN80_ENV
1272 /* reclaim->FlushVCache will need xcbhash */
1273 if (((tvcp->f.states & CDeadVnode)||(tvcp->f.states & CVInit))) {
1274 ReleaseWriteLock(&afs_xcbhash);
1275 /* passing in a parent hangs getting the vnode lock */
1276 code = afs_darwin_finalizevnode(tvcp, NULL, NULL, 0, 1);
1278 /* It's gonna get recycled - shouldn't happen */
1279 afs_StaleVCacheFlags(tvcp,
1280 AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB,
1283 /* re-acquire the usecount that finalizevnode disposed of */
1284 vnode_ref(AFSTOV(tvcp));
1287 ReleaseWriteLock(&afs_xcbhash);
1289 ReleaseWriteLock(&tvcp->lock);
1290 /* finally, we're done with the entry */
1291 afs_PutVCache(tvcp);
1292 } /* for all files we got back */
1294 /* finally return the pointer into the LRU queue */
1295 #ifdef AFS_DARWIN80_ENV
1296 if (((lruvcp->f.states & CDeadVnode)||(lruvcp->f.states & CVInit)))
1297 panic("vlru control point went dead before put\n");
1303 afs_PutVCache(lruvcp);
1307 /* Be sure to turn off the CBulkFetching flags */
1308 for (i = flagIndex; i < fidIndex; i++) {
1309 afid.Cell = adp->f.fid.Cell;
1310 afid.Fid.Volume = adp->f.fid.Fid.Volume;
1311 afid.Fid.Vnode = fidsp[i].Vnode;
1312 afid.Fid.Unique = fidsp[i].Unique;
1315 ObtainReadLock(&afs_xvcache);
1316 tvcp = afs_FindVCache(&afid, &retry, 0 /* !stats&!lru */);
1317 ReleaseReadLock(&afs_xvcache);
1318 } while (tvcp && retry);
1320 if ((tvcp->f.states & CBulkFetching)
1321 && (tvcp->f.m.Length == statSeqNo)) {
1322 tvcp->f.states &= ~CBulkFetching;
1324 afs_PutVCache(tvcp);
1328 afs_PutVolume(volp, READ_LOCK);
1331 osi_FreeLargeSpace((char *)fidsp);
1332 osi_Free((char *)statsp, AFSCBMAX * sizeof(AFSFetchStatus));
1333 osi_Free((char *)cbsp, AFSCBMAX * sizeof(AFSCallBack));
1337 #ifdef AFS_DARWIN80_ENV
1342 afs_ShouldTryBulkStat(struct vcache *adp)
1344 #ifdef AFS_DARWIN80_ENV
1349 if (AFS_IS_DISCONNECTED) {
1350 /* We can't prefetch entries if we're offline. */
1353 if (adp->opens < 1) {
1354 /* Don't bother prefetching entries if nobody is holding the dir open
1355 * while we're doing a lookup. */
1358 if (afs_VCacheStressed()) {
1359 /* If we already have too many vcaches, don't create more vcaches we
1360 * may not even use. */
1363 if ((adp->f.states & CForeign)) {
1364 /* Don't bulkstat for dfs xlator dirs. */
1367 if (afs_IsDynroot(adp)) {
1368 /* Don't prefetch dynroot entries; that's pointless, since we generate
1372 if (afs_InReadDir(adp)) {
1373 /* Don't bulkstat if we're in the middle of servicing a readdir() in
1374 * the same process. */
1381 osi_lookup_isdot(const char *aname)
1385 /* in Solaris, we can get passed "" as a path component if we are the
1386 * root directory, e.g. after a call to chroot. It is equivalent to
1390 #endif /* AFS_SUN5_ENV */
1391 if (aname[0] == '.' && !aname[1]) {
1398 #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
1399 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, struct pathname *pnp, int flags, struct vnode *rdir, afs_ucred_t *acred)
1400 #elif defined(UKERNEL)
1401 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred, int flags)
1403 afs_lookup(OSI_VC_DECL(adp), char *aname, struct vcache **avcp, afs_ucred_t *acred)
1406 struct vrequest *treq = NULL;
1408 struct vcache *tvc = 0;
1410 afs_int32 bulkcode = 0;
1411 int pass = 0, hit = 0;
1412 int force_eval = afs_fakestat_enable ? 0 : 1;
1414 afs_hyper_t versionNo;
1415 int no_read_access = 0;
1416 struct sysname_info sysState; /* used only for @sys checking */
1417 int dynrootRetry = 1;
1418 struct afs_fakestat_state fakestate;
1419 int tryEvalOnly = 0;
1421 /* Don't allow ENOENT errors, except for a specific code path where
1422 * 'enoent_prohibited' is cleared below. */
1423 int enoent_prohibited = 1;
1425 OSI_VC_CONVERT(adp);
1427 AFS_STATCNT(afs_lookup);
1428 afs_InitFakeStat(&fakestate);
1432 if ((code = afs_CreateReq(&treq, acred)))
1435 if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) {
1436 if (strcmp(aname, ".directory") == 0)
1440 #if defined(AFS_DARWIN_ENV)
1441 /* Workaround for MacOSX Finder, which tries to look for
1442 * .DS_Store and Contents under every directory.
1444 if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_MTPT) {
1445 if (strcmp(aname, ".DS_Store") == 0)
1447 if (strcmp(aname, "Contents") == 0)
1450 if (afs_fakestat_enable && adp->mvstat == AFS_MVSTAT_ROOT) {
1451 if (strncmp(aname, "._", 2) == 0)
1457 code = afs_TryEvalFakeStat(&adp, &fakestate, treq);
1459 code = afs_EvalFakeStat(&adp, &fakestate, treq);
1461 /*printf("Code is %d\n", code);*/
1463 if (tryEvalOnly && adp->mvstat == AFS_MVSTAT_MTPT)
1468 /* come back to here if we encounter a non-existent object in a read-only
1469 * volume's directory */
1471 *avcp = NULL; /* Since some callers don't initialize it */
1474 if (!(adp->f.states & CStatd) && !afs_InReadDir(adp)) {
1475 if ((code = afs_VerifyVCache2(adp, treq))) {
1481 /* watch for ".." in a volume root */
1482 if (adp->mvstat == AFS_MVSTAT_ROOT && aname[0] == '.' && aname[1] == '.' && !aname[2]) {
1483 /* looking up ".." in root via special hacks */
1484 if (adp->mvid.parent == (struct VenusFid *)0 || adp->mvid.parent->Fid.Volume == 0) {
1488 /* otherwise we have the fid here, so we use it */
1489 /*printf("Getting vcache\n");*/
1490 tvc = afs_GetVCache(adp->mvid.parent, treq);
1491 afs_Trace3(afs_iclSetp, CM_TRACE_GETVCDOTDOT, ICL_TYPE_FID, adp->mvid.parent,
1492 ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, code);
1494 code = (tvc ? 0 : EIO);
1496 if (tvc && !VREFCOUNT_GT(tvc, 0)) {
1500 /*printf("LOOKUP GETVCDOTDOT -> %d\n", code); */
1505 /* now check the access */
1506 if (treq->uid != adp->last_looker) {
1507 if (!afs_AccessOK(adp, PRSFS_LOOKUP, treq, CHECK_MODE_BITS)) {
1512 adp->last_looker = treq->uid;
1515 /* Check for read access as well. We need read access in order to
1516 * stat files, but not to stat subdirectories. */
1517 if (!afs_AccessOK(adp, PRSFS_READ, treq, CHECK_MODE_BITS))
1520 /* special case lookup of ".". Can we check for it sooner in this code,
1521 * for instance, way up before "redo:" ??
1522 * I'm not fiddling with the LRUQ here, either, perhaps I should, or else
1523 * invent a lightweight version of GetVCache.
1525 if (osi_lookup_isdot(aname)) { /* special case */
1526 ObtainReadLock(&afs_xvcache);
1527 if (osi_vnhold(adp) != 0) {
1528 ReleaseReadLock(&afs_xvcache);
1532 ReleaseReadLock(&afs_xvcache);
1533 #ifdef AFS_DARWIN80_ENV
1534 vnode_get(AFSTOV(adp));
1539 if (adp && !VREFCOUNT_GT(adp, 0)) {
1546 * Special case lookup of ".." in the dynamic mount directory.
1547 * The parent of this directory is _always_ the AFS root volume.
1549 if (afs_IsDynrootMount(adp) &&
1550 aname[0] == '.' && aname[1] == '.' && !aname[2]) {
1552 ObtainReadLock(&afs_xvcache);
1553 if (osi_vnhold(afs_globalVp) != 0) {
1554 ReleaseReadLock(&afs_xvcache);
1558 ReleaseReadLock(&afs_xvcache);
1559 #ifdef AFS_DARWIN80_ENV
1560 vnode_get(AFSTOV(afs_globalVp));
1563 *avcp = tvc = afs_globalVp;
1569 * Special case lookups in the dynamic mount directory.
1570 * The names here take the form cell:volume, similar to a mount point.
1571 * EvalMountData parses that and returns a cell and volume ID, which
1572 * we use to construct the appropriate dynroot Fid.
1574 if (afs_IsDynrootMount(adp)) {
1575 struct VenusFid tfid;
1576 afs_uint32 cellidx, volid, vnoid, uniq;
1578 code = EvalMountData('%', aname, 0, 0, NULL, treq, &cellidx, &volid, &vnoid, &uniq);
1581 /* If a vnode was returned, it's not a real mount point */
1583 struct cell *tcell = afs_GetCellByIndex(cellidx, READ_LOCK);
1584 tfid.Cell = tcell->cellNum;
1585 afs_PutCell(tcell, READ_LOCK);
1586 tfid.Fid.Vnode = vnoid;
1587 tfid.Fid.Volume = volid;
1588 tfid.Fid.Unique = uniq;
1590 afs_GetDynrootMountFid(&tfid);
1591 tfid.Fid.Vnode = VNUM_FROM_TYPEID(VN_TYPE_MOUNT, cellidx << 2);
1592 tfid.Fid.Unique = volid;
1594 *avcp = tvc = afs_GetVCache(&tfid, treq);
1595 code = (tvc ? 0 : EIO);
1600 #ifdef AFS_LINUX_ENV
1602 * Special case of the dynamic mount volume in a static root.
1603 * This is really unfortunate, but we need this for the translator.
1605 if (adp == afs_globalVp && !afs_GetDynrootEnable() &&
1606 !strcmp(aname, AFS_DYNROOT_MOUNTNAME)) {
1607 struct VenusFid tfid;
1609 afs_GetDynrootMountFid(&tfid);
1610 *avcp = tvc = afs_GetVCache(&tfid, treq);
1617 Check_AtSys(adp, aname, &sysState, treq);
1618 tname = sysState.name;
1620 /* 1st Check_AtSys and lookup by tname is required here, for now,
1621 * because the dnlc is *not* told to remove entries for the parent
1622 * dir of file/dir op that afs_LocalHero likes, but dnlc is informed
1623 * if the cached entry for the parent dir is invalidated for a
1625 * Otherwise, we'd be able to do a dnlc lookup on an entry ending
1626 * w/@sys and know the dnlc was consistent with reality. */
1627 tvc = osi_dnlc_lookup(adp, tname, WRITE_LOCK);
1628 *avcp = tvc; /* maybe wasn't initialized, but it is now */
1630 if (no_read_access && vType(tvc) != VDIR && vType(tvc) != VLNK) {
1631 /* need read access on dir to stat non-directory / non-link */
1637 #ifdef AFS_LINUX_ENV
1638 if (tvc->mvstat == AFS_MVSTAT_ROOT) { /* we don't trust the dnlc for root vcaches */
1639 AFS_RELE(AFSTOV(tvc));
1650 #endif /* AFS_LINUX_ENV */
1653 { /* sub-block just to reduce stack usage */
1655 afs_size_t dirOffset, dirLen;
1656 struct VenusFid tfid;
1658 /* now we have to lookup the next fid */
1659 if (afs_InReadDir(adp))
1660 tdc = adp->dcreaddir;
1662 tdc = afs_GetDCache(adp, (afs_size_t) 0, treq,
1663 &dirOffset, &dirLen, 1);
1665 *avcp = NULL; /* redundant, but harmless */
1670 /* now we will just call dir package with appropriate inode.
1671 * Dirs are always fetched in their entirety for now */
1672 ObtainReadLock(&adp->lock);
1673 ObtainReadLock(&tdc->lock);
1676 * Make sure that the data in the cache is current. There are two
1677 * cases we need to worry about:
1678 * 1. The cache data is being fetched by another process.
1679 * 2. The cache data is no longer valid
1681 * If a readdir is in progress _in this thread_, it has a shared
1682 * lock on the vcache and has obtained current data, so we just
1683 * use that. This eliminates several possible deadlocks.
1685 if (!afs_InReadDir(adp)) {
1686 while ((adp->f.states & CStatd)
1687 && (tdc->dflags & DFFetching)
1688 && afs_IsDCacheFresh(tdc, adp)) {
1689 ReleaseReadLock(&tdc->lock);
1690 ReleaseReadLock(&adp->lock);
1691 afs_osi_Sleep(&tdc->validPos);
1692 ObtainReadLock(&adp->lock);
1693 ObtainReadLock(&tdc->lock);
1695 if (!(adp->f.states & CStatd)
1696 || !afs_IsDCacheFresh(tdc, adp)) {
1697 ReleaseReadLock(&tdc->lock);
1698 ReleaseReadLock(&adp->lock);
1700 if (tname && tname != aname)
1701 osi_FreeLargeSpace(tname);
1706 /* Save the version number for when we call osi_dnlc_enter */
1707 hset(versionNo, tdc->f.versionNo);
1710 * check for, and handle "@sys" if it's there. We should be able
1711 * to avoid the alloc and the strcpy with a little work, but it's
1712 * not pressing. If there aren't any remote users (ie, via the
1713 * NFS translator), we have a slightly easier job.
1714 * the faster way to do this is to check for *aname == '@' and if
1715 * it's there, check for @sys, otherwise, assume there's no @sys
1716 * then, if the lookup fails, check for .*@sys...
1718 /* above now implemented by Check_AtSys and Next_AtSys */
1720 /* lookup the name in the appropriate dir, and return a cache entry
1721 * on the resulting fid */
1723 afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid,
1726 /* If the first lookup doesn't succeed, maybe it's got @sys in the name */
1727 while (code == ENOENT && Next_AtSys(adp, treq, &sysState))
1729 afs_dir_LookupOffset(tdc, sysState.name, &tfid.Fid,
1731 tname = sysState.name;
1733 ReleaseReadLock(&tdc->lock);
1734 if (!afs_InReadDir(adp))
1736 if (code == ENOENT && afs_IsDynroot(adp) && dynrootRetry && !tryEvalOnly) {
1738 char *cn = (tname[0] == '.') ? tname + 1 : tname;
1739 ReleaseReadLock(&adp->lock);
1740 /* confirm it's not just hushed */
1741 tc = afs_GetCellByName(cn, WRITE_LOCK);
1743 if (tc->states & CHush) {
1744 tc->states &= ~CHush;
1745 ReleaseWriteLock(&tc->lock);
1746 afs_DynrootInvalidate();
1749 ReleaseWriteLock(&tc->lock);
1751 /* Allow a second dynroot retry if the cell was hushed before */
1753 if (tname[0] == '.')
1754 afs_LookupAFSDB(tname + 1);
1756 afs_LookupAFSDB(tname);
1757 if (tname && tname != aname)
1758 osi_FreeLargeSpace(tname);
1761 ReleaseReadLock(&adp->lock);
1764 /* new fid has same cell and volume */
1765 tfid.Cell = adp->f.fid.Cell;
1766 tfid.Fid.Volume = adp->f.fid.Fid.Volume;
1767 afs_Trace4(afs_iclSetp, CM_TRACE_LOOKUP, ICL_TYPE_POINTER, adp,
1768 ICL_TYPE_STRING, tname, ICL_TYPE_FID, &tfid,
1769 ICL_TYPE_INT32, code);
1772 if (code == ENOENT) {
1773 /* The target name really doesn't exist (according to
1774 * afs_dir_LookupOffset, anyway). */
1775 enoent_prohibited = 0;
1780 /* prefetch some entries, if the dir is currently open. The variable
1781 * dirCookie tells us where to start prefetching from.
1783 if (afs_ShouldTryBulkStat(adp)) {
1785 /* if the entry is not in the cache, or is in the cache,
1786 * but hasn't been statd, then do a bulk stat operation.
1790 ObtainReadLock(&afs_xvcache);
1791 tvc = afs_FindVCache(&tfid, &retry, 0 /* !stats,!lru */ );
1792 ReleaseReadLock(&afs_xvcache);
1793 } while (tvc && retry);
1795 if (!tvc || !(tvc->f.states & CStatd))
1796 bulkcode = afs_DoBulkStat(adp, dirCookie, treq);
1800 /* if the vcache isn't usable, release it */
1801 if (tvc && !(tvc->f.states & CStatd)) {
1810 /* now get the status info, if we don't already have it */
1811 /* This is kind of weird, but we might wind up accidentally calling
1812 * RXAFS_Lookup because we happened upon a file which legitimately
1813 * has a 0 uniquifier. That is the result of allowing unique to wrap
1814 * to 0. This was fixed in AFS 3.4. For CForeign, Unique == 0 means that
1815 * the file has not yet been looked up.
1818 if (!tfid.Fid.Unique && (adp->f.states & CForeign)) {
1819 tvc = afs_LookupVCache(&tfid, treq, adp, tname);
1821 if (!tvc && !bulkcode) { /* lookup failed or wasn't called */
1822 tvc = afs_GetVCache(&tfid, treq);
1825 } /* sub-block just to reduce stack usage */
1828 if (adp->f.states & CForeign)
1829 tvc->f.states |= CForeign;
1830 tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
1831 tvc->f.parent.unique = adp->f.fid.Fid.Unique;
1832 tvc->f.states &= ~CBulkStat;
1834 if (afs_fakestat_enable == 2 && tvc->mvstat == AFS_MVSTAT_MTPT) {
1835 ObtainSharedLock(&tvc->lock, 680);
1836 if (!tvc->linkData) {
1837 UpgradeSToWLock(&tvc->lock, 681);
1838 code = afs_HandleLink(tvc, treq);
1839 ConvertWToRLock(&tvc->lock);
1841 ConvertSToRLock(&tvc->lock);
1844 if (!code && !afs_strchr(tvc->linkData, ':'))
1846 ReleaseReadLock(&tvc->lock);
1848 if (tvc->mvstat == AFS_MVSTAT_MTPT && (tvc->f.states & CMValid) && tvc->mvid.target_root != NULL)
1849 force_eval = 1; /* This is now almost for free, get it correct */
1851 #if defined(UKERNEL)
1852 if (!(flags & AFS_LOOKUP_NOEVAL))
1853 /* don't eval mount points */
1854 #endif /* UKERNEL */
1855 if (tvc->mvstat == AFS_MVSTAT_MTPT && force_eval) {
1856 /* a mt point, possibly unevaluated */
1857 struct volume *tvolp;
1859 ObtainWriteLock(&tvc->lock, 133);
1860 code = EvalMountPoint(tvc, adp, &tvolp, treq);
1861 ReleaseWriteLock(&tvc->lock);
1866 afs_PutVolume(tvolp, WRITE_LOCK);
1870 /* next, we want to continue using the target of the mt point */
1871 if (tvc->mvid.target_root && (tvc->f.states & CMValid)) {
1873 /* now lookup target, to set .. pointer */
1874 afs_Trace2(afs_iclSetp, CM_TRACE_LOOKUP1,
1875 ICL_TYPE_POINTER, tvc, ICL_TYPE_FID,
1877 uvc = tvc; /* remember for later */
1879 if (tvolp && (tvolp->states & VForeign)) {
1880 /* XXXX tvolp has ref cnt on but not locked! XXX */
1882 afs_GetRootVCache(tvc->mvid.target_root, treq, tvolp);
1884 tvc = afs_GetVCache(tvc->mvid.target_root, treq);
1886 afs_PutVCache(uvc); /* we're done with it */
1891 afs_PutVolume(tvolp, WRITE_LOCK);
1896 /* now, if we came via a new mt pt (say because of a new
1897 * release of a R/O volume), we must reevaluate the ..
1898 * ptr to point back to the appropriate place */
1900 ObtainWriteLock(&tvc->lock, 134);
1901 if (tvc->mvid.parent == NULL) {
1903 osi_AllocSmallSpace(sizeof(struct VenusFid));
1905 /* setup backpointer */
1906 *tvc->mvid.parent = tvolp->dotdot;
1907 ReleaseWriteLock(&tvc->lock);
1908 afs_PutVolume(tvolp, WRITE_LOCK);
1914 afs_PutVolume(tvolp, WRITE_LOCK);
1919 if (tvc && !VREFCOUNT_GT(tvc, 0)) {
1924 /* if we get here, we found something in a directory that couldn't
1925 * be located (a Multics "connection failure"). If the volume is
1926 * read-only, we try flushing this entry from the cache and trying
1928 if (!AFS_IS_DISCONNECTED) {
1931 tv = afs_GetVolume(&adp->f.fid, treq, READ_LOCK);
1933 if (tv->states & VRO) {
1934 pass = 1; /* try this *once* */
1935 /* re-stat to get later version */
1936 afs_StaleVCache(adp);
1937 afs_PutVolume(tv, READ_LOCK);
1940 afs_PutVolume(tv, READ_LOCK);
1950 /* put the network buffer back, if need be */
1951 if (tname != aname && tname)
1952 osi_FreeLargeSpace(tname);
1956 afs_AddMarinerName(aname, tvc);
1958 #if defined(UKERNEL)
1959 if (!(flags & AFS_LOOKUP_NOEVAL)) {
1960 /* Here we don't enter the name into the DNLC because we want the
1961 * evaluated mount dir to be there (the vcache for the mounted
1962 * volume) rather than the vc of the mount point itself. We can
1963 * still find the mount point's vc in the vcache by its fid. */
1964 #endif /* UKERNEL */
1965 if (!hit && (force_eval || tvc->mvstat != AFS_MVSTAT_MTPT)) {
1966 osi_dnlc_enter(adp, aname, tvc, &versionNo);
1968 #ifdef AFS_LINUX_ENV
1969 /* So Linux inode cache is up to date. */
1970 code = afs_VerifyVCache(tvc, treq);
1972 afs_PutFakeStat(&fakestate);
1973 afs_DestroyReq(treq);
1974 AFS_DISCON_UNLOCK();
1975 return 0; /* can't have been any errors if hit and !code */
1978 #if defined(UKERNEL)
1985 code = afs_CheckCode(code, treq, 19);
1987 /* If there is an error, make sure *avcp is null.
1988 * Alphas panic otherwise - defect 10719.
1992 if (code == ENOENT && enoent_prohibited) {
1994 * We got an ENOENT error, but we didn't get it while looking up the
1995 * dir entry in the relevant dir blob. That means we likely hit some
1996 * other internal error; don't allow us to return ENOENT in this case,
1997 * since some platforms cache ENOENT errors, and the target path name
1998 * may actually exist.
2003 afs_PutFakeStat(&fakestate);
2004 afs_DestroyReq(treq);
2005 AFS_DISCON_UNLOCK();