2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
18 #include <afsconfig.h>
19 #include "afs/param.h"
22 #include "afs/sysincludes.h" /* Standard vendor system headers */
23 #include "afsincludes.h" /* Afs-based standard headers */
24 #include "afs/afs_stats.h" /* statistics */
25 #include "afs/afs_cbqueue.h"
26 #include "afs/nfsclient.h"
27 #include "afs/afs_osidnlc.h"
28 #include "afs/afs_osi.h"
31 extern char afs_zeros[AFS_ZEROS];
33 /* Imported variables */
34 extern afs_rwlock_t afs_xdcache;
35 extern unsigned char *afs_indexFlags;
36 extern afs_hyper_t *afs_indexTimes; /* Dcache entry Access times */
37 extern afs_hyper_t afs_indexCounter; /* Fake time for marking index */
40 /* Forward declarations */
41 void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc,
42 afs_ucred_t *acred, struct vrequest *areq);
45 afs_read(struct vcache *avc, struct uio *auio, afs_ucred_t *acred,
48 afs_size_t totalLength;
49 afs_size_t transferLength;
51 afs_size_t offset, len, tlen;
53 struct dcache *tdc = 0;
54 afs_int32 error, trybusy = 1;
55 #ifdef AFS_DARWIN80_ENV
59 struct uio *tuiop = &tuio;
60 struct iovec *tvec = NULL;
65 AFS_STATCNT(afs_read);
72 /* check that we have the latest status info in the vnode cache */
73 if ((code = afs_InitReq(&treq, acred)))
78 osi_Panic("null avc in afs_GenericRead");
80 code = afs_VerifyVCache(avc, &treq);
82 code = afs_CheckCode(code, &treq, 8); /* failed to get it */
86 #ifndef AFS_VM_RDWR_ENV
87 if (AFS_NFSXLATORREQ(acred)) {
89 (avc, PRSFS_READ, &treq,
90 CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) {
91 code = afs_CheckCode(EACCES, &treq, 9);
97 #ifndef AFS_DARWIN80_ENV
98 tvec = osi_AllocSmallSpace(sizeof(struct iovec));
100 totalLength = AFS_UIO_RESID(auio);
101 filePos = AFS_UIO_OFFSET(auio);
102 afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc,
103 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32,
104 totalLength, ICL_TYPE_OFFSET,
105 ICL_HANDLE_OFFSET(avc->f.m.Length));
109 ObtainReadLock(&avc->lock);
110 #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV)
111 if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) {
112 hset(avc->flushDV, avc->f.m.DataVersion);
120 if (filePos >= avc->f.m.Length) {
122 len = sizeof(afs_zeros); /* and in 0 buffer */
124 #ifdef AFS_DARWIN80_ENV
126 tuiop = afsio_darwin_partialcopy(auio, trimlen);
128 afsio_copy(auio, &tuio, tvec);
130 afsio_trim(&tuio, trimlen);
132 AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
135 while (avc->f.m.Length > 0 && totalLength > 0) {
136 /* read all of the cached info */
137 if (filePos >= avc->f.m.Length)
138 break; /* all done */
141 ReleaseReadLock(&tdc->lock);
144 tdc = afs_FindDCache(avc, filePos);
146 ObtainReadLock(&tdc->lock);
147 offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
148 len = tdc->validPos - filePos;
151 /* a tricky question: does the presence of the DFFetching flag
152 * mean that we're fetching the latest version of the file? No.
153 * The server could update the file as soon as the fetch responsible
154 * for the setting of the DFFetching flag completes.
156 * However, the presence of the DFFetching flag (visible under
157 * a dcache read lock since it is set and cleared only under a
158 * dcache write lock) means that we're fetching as good a version
159 * as was known to this client at the time of the last call to
160 * afs_VerifyVCache, since the latter updates the stat cache's
161 * m.DataVersion field under a vcache write lock, and from the
162 * time that the DFFetching flag goes on in afs_GetDCache (before
163 * the fetch starts), to the time it goes off (after the fetch
164 * completes), afs_GetDCache keeps at least a read lock on the
167 * This means that if the DFFetching flag is set, we can use that
168 * data for any reads that must come from the current version of
169 * the file (current == m.DataVersion).
171 * Another way of looking at this same point is this: if we're
172 * fetching some data and then try do an afs_VerifyVCache, the
173 * VerifyVCache operation will not complete until after the
174 * DFFetching flag is turned off and the dcache entry's f.versionNo
177 * Note, by the way, that if DFFetching is set,
178 * m.DataVersion > f.versionNo (the latter is not updated until
179 * after the fetch completes).
182 ReleaseReadLock(&tdc->lock);
183 afs_PutDCache(tdc); /* before reusing tdc */
185 tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2);
191 ObtainReadLock(&tdc->lock);
192 /* now, first try to start transfer, if we'll need the data. If
193 * data already coming, we don't need to do this, obviously. Type
194 * 2 requests never return a null dcache entry, btw.
196 if (!(tdc->dflags & DFFetching)
197 && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
198 /* have cache entry, it is not coming in now,
199 * and we'll need new data */
201 if (trybusy && !afs_BBusy()) {
203 /* daemon is not busy */
204 ObtainSharedLock(&tdc->mflock, 665);
205 if (!(tdc->mflags & DFFetchReq)) {
206 /* start the daemon (may already be running, however) */
207 UpgradeSToWLock(&tdc->mflock, 666);
208 tdc->mflags |= DFFetchReq;
209 bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred,
210 (afs_size_t) filePos, (afs_size_t) 0,
213 /* Bkg table full; retry deadlocks */
214 tdc->mflags &= ~DFFetchReq;
215 trybusy = 0; /* Avoid bkg daemon since they're too busy */
216 ReleaseWriteLock(&tdc->mflock);
219 ConvertWToSLock(&tdc->mflock);
220 /* don't use bp pointer! */
223 ConvertSToRLock(&tdc->mflock);
224 while (!code && tdc->mflags & DFFetchReq) {
225 afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT,
226 ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32,
227 __LINE__, ICL_TYPE_POINTER, tdc,
228 ICL_TYPE_INT32, tdc->dflags);
229 /* don't need waiting flag on this one */
230 ReleaseReadLock(&tdc->mflock);
231 ReleaseReadLock(&tdc->lock);
232 ReleaseReadLock(&avc->lock);
233 code = afs_osi_SleepSig(&tdc->validPos);
234 ObtainReadLock(&avc->lock);
235 ObtainReadLock(&tdc->lock);
236 ObtainReadLock(&tdc->mflock);
238 ReleaseReadLock(&tdc->mflock);
245 /* now data may have started flowing in (if DFFetching is on). If
246 * data is now streaming in, then wait for some interesting stuff.
249 while (!code && (tdc->dflags & DFFetching)
250 && tdc->validPos <= filePos) {
251 /* too early: wait for DFFetching flag to vanish,
252 * or data to appear */
253 afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
254 __FILE__, ICL_TYPE_INT32, __LINE__,
255 ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32,
257 ReleaseReadLock(&tdc->lock);
258 ReleaseReadLock(&avc->lock);
259 code = afs_osi_SleepSig(&tdc->validPos);
260 ObtainReadLock(&avc->lock);
261 ObtainReadLock(&tdc->lock);
267 /* fetching flag gone, data is here, or we never tried
268 * (BBusy for instance) */
269 if (tdc->dflags & DFFetching) {
270 /* still fetching, some new data is here:
271 * compute length and offset */
272 offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
273 len = tdc->validPos - filePos;
275 /* no longer fetching, verify data version
276 * (avoid new GetDCache call) */
277 if (hsame(avc->f.m.DataVersion, tdc->f.versionNo)
278 && ((len = tdc->validPos - filePos) > 0)) {
279 offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
281 /* don't have current data, so get it below */
282 afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO,
283 ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos),
284 ICL_TYPE_HYPER, &avc->f.m.DataVersion,
285 ICL_TYPE_HYPER, &tdc->f.versionNo);
286 ReleaseReadLock(&tdc->lock);
293 /* If we get, it was not possible to start the
294 * background daemon. With flag == 1 afs_GetDCache
295 * does the FetchData rpc synchronously.
297 ReleaseReadLock(&avc->lock);
298 tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1);
299 ObtainReadLock(&avc->lock);
301 ObtainReadLock(&tdc->lock);
305 afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc,
306 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset),
307 ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len));
319 if (len > totalLength)
320 len = totalLength; /* will read len bytes */
321 if (len <= 0) { /* shouldn't get here if DFFetching is on */
322 /* read past the end of a chunk, may not be at next chunk yet, and yet
323 * also not at eof, so may have to supply fake zeros */
324 len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */
325 if (len > totalLength)
326 len = totalLength; /* and still within xfr request */
327 tlen = avc->f.m.Length - offset; /* and still within file */
331 len = sizeof(afs_zeros); /* and in 0 buffer */
332 #ifdef AFS_DARWIN80_ENV
334 tuiop = afsio_darwin_partialcopy(auio, trimlen);
336 afsio_copy(auio, &tuio, tvec);
338 afsio_trim(&tuio, trimlen);
340 AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
346 /* get the data from the cache */
348 /* mung uio structure to be right for this transfer */
349 #ifdef AFS_DARWIN80_ENV
351 tuiop = afsio_darwin_partialcopy(auio, trimlen);
352 uio_setoffset(tuiop, offset);
354 afsio_copy(auio, &tuio, tvec);
356 afsio_trim(&tuio, trimlen);
357 tuio.afsio_offset = offset;
360 code = (*(afs_cacheType->vreadUIO))(&tdc->f.inode, tuiop);
367 /* otherwise we've read some, fixup length, etc and continue with next seg */
368 len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */
370 afsio_skip(auio, trimlen); /* update input uio structure */
372 transferLength += len;
376 break; /* surprise eof */
377 #ifdef AFS_DARWIN80_ENV
383 } /* the whole while loop */
388 * tdc->lock(R) if tdc
391 /* if we make it here with tdc non-zero, then it is the last chunk we
392 * dealt with, and we have to release it when we're done. We hold on
393 * to it in case we need to do a prefetch.
396 ReleaseReadLock(&tdc->lock);
397 #if !defined(AFS_VM_RDWR_ENV)
398 /* try to queue prefetch, if needed */
400 if (!(tdc->mflags &DFNextStarted))
401 afs_PrefetchChunk(avc, tdc, acred, &treq);
407 ReleaseReadLock(&avc->lock);
409 code = afs_CheckCode(error, &treq, 10);
411 #ifdef AFS_DARWIN80_ENV
415 osi_FreeSmallSpace(tvec);
423 /* called with the dcache entry triggering the fetch, the vcache entry involved,
424 * and a vrequest for the read call. Marks the dcache entry as having already
425 * triggered a prefetch, starts the prefetch going and sets the DFFetchReq
426 * flag in the prefetched block, so that the next call to read knows to wait
427 * for the daemon to start doing things.
429 * This function must be called with the vnode at least read-locked, and
430 * no locks on the dcache, because it plays around with dcache entries.
433 afs_PrefetchChunk(struct vcache *avc, struct dcache *adc,
434 afs_ucred_t *acred, struct vrequest *areq)
438 afs_size_t j1, j2; /* junk vbls for GetDCache to trash */
440 offset = adc->f.chunk + 1; /* next chunk we'll need */
441 offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */
442 ObtainReadLock(&adc->lock);
443 ObtainSharedLock(&adc->mflock, 662);
444 if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted)
448 UpgradeSToWLock(&adc->mflock, 663);
449 adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */
450 ReleaseWriteLock(&adc->mflock);
451 ReleaseReadLock(&adc->lock);
453 tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */
455 * In disconnected mode, type 2 can return 0 because it doesn't
456 * make any sense to allocate a dcache we can never fill
461 ObtainSharedLock(&tdc->mflock, 651);
462 if (!(tdc->mflags & DFFetchReq)) {
463 /* ask the daemon to do the work */
464 UpgradeSToWLock(&tdc->mflock, 652);
465 tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */
466 /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done,
467 * since we don't want to wait for it to finish before doing so ourselves.
469 bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred,
470 (afs_size_t) offset, (afs_size_t) 1, tdc,
471 (void *)0, (void *)0);
473 /* Bkg table full; just abort non-important prefetching to avoid deadlocks */
474 tdc->mflags &= ~DFFetchReq;
475 ReleaseWriteLock(&tdc->mflock);
479 * DCLOCKXXX: This is a little sketchy, since someone else
480 * could have already started a prefetch.. In practice,
481 * this probably doesn't matter; at most it would cause an
482 * extra slot in the BKG table to be used up when someone
483 * prefetches this for the second time.
485 ObtainReadLock(&adc->lock);
486 ObtainWriteLock(&adc->mflock, 664);
487 adc->mflags &= ~DFNextStarted;
488 ReleaseWriteLock(&adc->mflock);
489 ReleaseReadLock(&adc->lock);
491 ReleaseWriteLock(&tdc->mflock);
494 ReleaseSharedLock(&tdc->mflock);
498 ReleaseSharedLock(&adc->mflock);
499 ReleaseReadLock(&adc->lock);
504 afs_UFSReadUIO(afs_dcache_id_t *cacheId, struct uio *tuiop)
507 struct osi_file *tfile;
509 tfile = (struct osi_file *) osi_UFSOpen(cacheId);
511 #if defined(AFS_AIX41_ENV)
514 VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, tuiop, NULL, NULL,
515 NULL, afs_osi_credp);
517 #elif defined(AFS_AIX32_ENV)
519 VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, tuiop, NULL, NULL);
520 /* Flush all JFS pages now for big performance gain in big file cases
521 * If we do something like this, must check to be sure that AFS file
522 * isn't mmapped... see afs_gn_map() for why.
525 if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) {
526 any different ways to do similar things:
527 so far, the best performing one is #2, but #1 might match it if we
528 straighten out the confusion regarding which pages to flush. It
530 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1);
531 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
532 (len + PAGESIZE-1)/PAGESIZE);
533 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly
534 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails
535 tfile->vnode->v_gnode->gn_seg = NULL;
539 Unfortunately, this seems to cause frequent "cache corruption" episodes.
540 vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
541 (len + PAGESIZE-1)/PAGESIZE);
544 #elif defined(AFS_AIX_ENV)
546 VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset,
547 tuiop, NULL, NULL, -1);
548 #elif defined(AFS_SUN5_ENV)
550 #ifdef AFS_SUN510_ENV
553 VOP_RWLOCK(tfile->vnode, 0, &ct);
554 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp, &ct);
555 VOP_RWUNLOCK(tfile->vnode, 0, &ct);
558 VOP_RWLOCK(tfile->vnode, 0);
559 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
560 VOP_RWUNLOCK(tfile->vnode, 0);
563 #elif defined(AFS_SGI_ENV)
565 AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ);
566 AFS_VOP_READ(tfile->vnode, tuiop, IO_ISLOCKED, afs_osi_credp,
568 AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ);
570 #elif defined(AFS_HPUX100_ENV)
572 code = VOP_RDWR(tfile->vnode, tuiop, UIO_READ, 0, afs_osi_credp);
574 #elif defined(AFS_LINUX20_ENV)
576 code = osi_rdwr(tfile, tuiop, UIO_READ);
578 #elif defined(AFS_DARWIN80_ENV)
580 code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp);
582 #elif defined(AFS_DARWIN_ENV)
584 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc());
585 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
586 VOP_UNLOCK(tfile->vnode, 0, current_proc());
588 #elif defined(AFS_FBSD80_ENV)
590 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
591 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
592 VOP_UNLOCK(tfile->vnode, 0);
594 #elif defined(AFS_FBSD_ENV)
596 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread);
597 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
598 VOP_UNLOCK(tfile->vnode, 0, curthread);
600 #elif defined(AFS_NBSD_ENV)
601 tuiop->uio_rw = UIO_READ;
603 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
604 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
605 # if defined(AFS_NBSD60_ENV)
606 VOP_UNLOCK(tfile->vnode);
608 VOP_UNLOCK(tfile->vnode, 0);
611 #elif defined(AFS_XBSD_ENV)
613 VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc);
614 code = VOP_READ(tfile->vnode, tuiop, 0, afs_osi_credp);
615 VOP_UNLOCK(tfile->vnode, 0, curproc);
618 code = VOP_RDWR(tfile->vnode, tuiop, UIO_READ, 0, afs_osi_credp);