3 * THE REGENTS OF THE UNIVERSITY OF MICHIGAN
6 * Permission is granted to use, copy, create derivative works
7 * and redistribute this software and such derivative works
8 * for any purpose, so long as the name of The University of
9 * Michigan is not used in any advertising or publicity
10 * pertaining to the use of distribution of this software
11 * without specific, written prior authorization. If the
12 * above copyright notice or any other identification of the
13 * University of Michigan is included in any copy of any
14 * portion of this software, then the disclaimer below must
17 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
18 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
19 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O
20 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
21 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
23 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
24 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
25 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
26 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
27 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
32 * Portions Copyright (c) 2008
33 * The Linux Box Corporation
36 * Permission is granted to use, copy, create derivative works
37 * and redistribute this software and such derivative works
38 * for any purpose, so long as the name of the Linux Box
39 * Corporation is not used in any advertising or publicity
40 * pertaining to the use or distribution of this software
41 * without specific, written prior authorization. If the
42 * above copyright notice or any other identification of the
43 * Linux Box Corporation is included in any copy of any
44 * portion of this software, then the disclaimer below must
47 * This software is provided as is, without representation
48 * from the Linux Box Corporation as to its fitness for any
49 * purpose, and without warranty by the Linux Box Corporation
50 * of any kind, either express or implied, including
51 * without limitation the implied warranties of
52 * merchantability and fitness for a particular purpose. The
53 * Linux Box Corporation shall not be liable for any damages,
54 * including special, indirect, incidental, or consequential
55 * damages, with respect to any claim arising out of or in
56 * connection with the use of the software, even if it has been
57 * or is hereafter advised of the possibility of such damages.
61 #include <afsconfig.h>
62 #include "afs/param.h"
63 #if defined(AFS_CACHE_BYPASS) || defined(UKERNEL)
64 #include "afs/afs_bypasscache.h"
70 #include "afs/sysincludes.h" /* Standard vendor system headers */
71 #include "afs/afsincludes.h" /* Afs-based standard headers */
72 #include "afs/afs_stats.h" /* statistics */
73 #include "afs/nfsclient.h"
74 #include "rx/rx_globals.h"
77 #define afs_min(A,B) ((A)<(B)) ? (A) : (B)
80 /* conditional GLOCK macros */
81 #define COND_GLOCK(var) \
83 var = ISAFS_GLOCK(); \
88 #define COND_RE_GUNLOCK(var) \
95 /* conditional GUNLOCK macros */
97 #define COND_GUNLOCK(var) \
99 var = ISAFS_GLOCK(); \
104 #define COND_RE_GLOCK(var) \
111 int cache_bypass_strategy = NEVER_BYPASS_CACHE;
112 afs_size_t cache_bypass_threshold = AFS_CACHE_BYPASS_DISABLED; /* file size > threshold triggers bypass */
113 int cache_bypass_prefetch = 1; /* Should we do prefetching ? */
116 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
117 * but that routine is static. We are about to change a file from
118 * normal caching to bypass it's caching. Therefore, we want to
119 * free up any cache space in use by the file, and throw out any
120 * existing VM pages for the file. We keep track of the number of
121 * times we go back and forth from caching to bypass.
124 afs_TransitionToBypass(struct vcache *avc,
125 afs_ucred_t *acred, int aflags)
135 if (aflags & TRANSChangeDesiredBit)
137 if (aflags & TRANSSetManualBit)
142 ObtainWriteLock(&avc->lock, 925);
144 * Someone may have beat us to doing the transition - we had no lock
145 * when we checked the flag earlier. No cause to panic, just return.
147 if (avc->cachingStates & FCSBypass)
150 /* If we never cached this, just change state */
151 if (setDesire && (!(avc->cachingStates & FCSBypass))) {
152 avc->cachingStates |= FCSBypass;
156 /* cg2v, try to store any chunks not written 20071204 */
157 if (avc->execsOrWriters > 0) {
158 struct vrequest *treq = NULL;
160 code = afs_CreateReq(&treq, acred);
162 code = afs_StoreAllSegments(avc, treq, AFS_SYNC | AFS_LASTSTORE);
163 afs_DestroyReq(treq);
167 /* also cg2v, don't dequeue the callback */
168 /* next reference will re-stat */
169 afs_StaleVCacheFlags(avc, AFS_STALEVC_NOCB, CDirty);
170 /* now find the disk cache entries */
171 afs_TryToSmush(avc, acred, 1);
172 if (avc->linkData && !(avc->f.states & CCore)) {
173 afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
174 avc->linkData = NULL;
177 avc->cachingStates |= FCSBypass; /* Set the bypass flag */
179 avc->cachingStates |= FCSDesireBypass;
181 avc->cachingStates |= FCSManuallySet;
182 avc->cachingTransitions++;
185 ReleaseWriteLock(&avc->lock);
190 * This is almost exactly like the PFlush() routine in afs_pioctl.c,
191 * but that routine is static. We are about to change a file from
192 * bypassing caching to normal caching. Therefore, we want to
193 * throw out any existing VM pages for the file. We keep track of
194 * the number of times we go back and forth from caching to bypass.
197 afs_TransitionToCaching(struct vcache *avc,
207 if (aflags & TRANSChangeDesiredBit)
209 if (aflags & TRANSSetManualBit)
213 ObtainWriteLock(&avc->lock, 926);
215 * Someone may have beat us to doing the transition - we had no lock
216 * when we checked the flag earlier. No cause to panic, just return.
218 if (!(avc->cachingStates & FCSBypass))
221 /* Ok, we actually do need to flush */
222 /* next reference will re-stat cache entry */
223 afs_StaleVCacheFlags(avc, 0, CDirty);
225 /* now find the disk cache entries */
226 afs_TryToSmush(avc, acred, 1);
227 if (avc->linkData && !(avc->f.states & CCore)) {
228 afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
229 avc->linkData = NULL;
232 avc->cachingStates &= ~(FCSBypass); /* Reset the bypass flag */
234 avc->cachingStates &= ~(FCSDesireBypass);
236 avc->cachingStates |= FCSManuallySet;
237 avc->cachingTransitions++;
240 ReleaseWriteLock(&avc->lock);
244 /* In the case where there's an error in afs_NoCacheFetchProc or
245 * afs_PrefetchNoCache, all of the pages they've been passed need
249 typedef void * bypass_page_t;
251 #define unlock_and_release_pages(auio)
252 #define release_full_page(pp, pageoff)
255 typedef struct page * bypass_page_t;
257 #define unlock_and_release_pages(auio) \
259 struct iovec *ciov; \
262 afs_int32 iovno = 0; \
263 ciov = auio->uio_iov; \
264 iovmax = auio->uio_iovcnt - 1; \
265 pp = (bypass_page_t) ciov->iov_base; \
268 if (PageLocked(pp)) \
270 put_page(pp); /* decrement refcount */ \
275 ciov = (auio->uio_iov + iovno); \
276 pp = (bypass_page_t) ciov->iov_base; \
280 #define release_full_page(pp, pageoff) \
282 /* this is appropriate when no caller intends to unlock \
283 * and release the page */ \
284 SetPageUptodate(pp); \
288 afs_warn("afs_NoCacheFetchProc: page not locked!\n"); \
289 put_page(pp); /* decrement refcount */ \
294 afs_bypass_copy_page(bypass_page_t pp, int pageoff, struct iovec *rxiov,
295 int iovno, int iovoff, struct uio *auio, int curiov, int partial)
301 dolen = auio->uio_iov[curiov].iov_len - pageoff;
303 dolen = rxiov[iovno].iov_len - iovoff;
305 #if !defined(UKERNEL)
306 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
307 address = kmap_atomic(pp);
309 address = kmap_atomic(pp, KM_USER0);
314 memcpy(address + pageoff, (char *)(rxiov[iovno].iov_base) + iovoff, dolen);
315 #if !defined(UKERNEL)
316 # if defined(KMAP_ATOMIC_TAKES_NO_KM_TYPE)
317 kunmap_atomic(address);
319 kunmap_atomic(address, KM_USER0);
324 /* no-cache prefetch routine */
326 afs_NoCacheFetchProc(struct rx_call *acall,
329 afs_int32 release_pages,
334 int moredata, iovno, iovoff, iovmax, result, locked;
343 rxiov = osi_AllocSmallSpace(sizeof(struct iovec) * RX_MAXIOVECS);
344 ciov = auio->uio_iov;
345 pp = (bypass_page_t) ciov->iov_base;
346 iovmax = auio->uio_iovcnt - 1;
347 iovno = iovoff = result = 0;
350 COND_GUNLOCK(locked);
351 code = rx_Read(acall, (char *)&length, sizeof(afs_int32));
352 COND_RE_GLOCK(locked);
353 if (code != sizeof(afs_int32)) {
355 afs_warn("Preread error. code: %d instead of %d\n",
356 code, (int)sizeof(afs_int32));
357 unlock_and_release_pages(auio);
360 length = ntohl(length);
364 afs_warn("Preread error. Got length %d, which is greater than size %d\n",
366 unlock_and_release_pages(auio);
370 /* If we get a 0 length reply, time to cleanup and return */
372 unlock_and_release_pages(auio);
378 * The fetch protocol is extended for the AFS/DFS translator
379 * to allow multiple blocks of data, each with its own length,
380 * to be returned. As long as the top bit is set, there are more
383 * We do not do this for AFS file servers because they sometimes
384 * return large negative numbers as the transfer size.
386 if (avc->f.states & CForeign) {
387 moredata = length & 0x80000000;
388 length &= ~0x80000000;
393 for (curpage = 0; curpage <= iovmax; curpage++) {
395 /* properly, this should track uio_resid, not a fixed page size! */
396 while (pageoff < auio->uio_iov[curpage].iov_len) {
397 /* If no more iovs, issue new read. */
399 COND_GUNLOCK(locked);
400 bytes = rx_Readv(acall, rxiov, &nio, RX_MAXIOVECS, length);
401 COND_RE_GLOCK(locked);
403 afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", bytes);
405 unlock_and_release_pages(auio);
407 } else if (bytes == 0) {
408 /* we failed to read the full length */
410 afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n");
411 unlock_and_release_pages(auio);
415 auio->uio_resid -= bytes;
418 pp = (bypass_page_t)auio->uio_iov[curpage].iov_base;
419 if (pageoff + (rxiov[iovno].iov_len - iovoff) <= auio->uio_iov[curpage].iov_len) {
420 /* Copy entire (or rest of) current iovec into current page */
422 afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 0);
423 length -= (rxiov[iovno].iov_len - iovoff);
424 pageoff += rxiov[iovno].iov_len - iovoff;
428 /* Copy only what's needed to fill current page */
430 afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 1);
431 length -= (auio->uio_iov[curpage].iov_len - pageoff);
432 iovoff += auio->uio_iov[curpage].iov_len - pageoff;
433 pageoff = auio->uio_iov[curpage].iov_len;
436 /* we filled a page, or this is the last page. conditionally release it */
437 if (pp && ((pageoff == auio->uio_iov[curpage].iov_len &&
438 release_pages) || (length == 0 && iovno >= nio)))
439 release_full_page(pp, pageoff);
441 if (length == 0 && iovno >= nio)
448 osi_FreeSmallSpace(rxiov);
453 /* dispatch a no-cache read request */
455 afs_ReadNoCache(struct vcache *avc,
456 struct nocache_read_request *bparms,
461 struct brequest *breq;
462 struct vrequest *areq = NULL;
466 afs_warn("afs_ReadNoCache VCache Error!\n");
471 /* the receiver will free areq */
472 code = afs_CreateReq(&areq, acred);
474 afs_warn("afs_ReadNoCache afs_CreateReq error!\n");
476 code = afs_VerifyVCache(avc, areq);
478 afs_warn("afs_ReadNoCache Failed to verify VCache!\n");
484 code = afs_CheckCode(code, areq, 11); /* failed to get it */
490 /* and queue this one */
494 breq = afs_BQueue(BOP_FETCH_NOCACHE, avc, B_DONTWAIT, 0, acred, 1, 1,
495 bparms, (void *)0, (void *)0);
500 afs_osi_Wait(10 * bcnt, 0, 0);
512 /* If there's a problem before we queue the request, we need to
513 * do everything that would normally happen when the request was
514 * processed, like unlocking the pages and freeing memory.
516 unlock_and_release_pages(bparms->auio);
518 afs_DestroyReq(areq);
520 osi_Free(bparms->auio->uio_iov,
521 bparms->auio->uio_iovcnt * sizeof(struct iovec));
522 osi_Free(bparms->auio, sizeof(struct uio));
523 osi_Free(bparms, sizeof(struct nocache_read_request));
528 /* Cannot have static linkage--called from BPrefetch (afs_daemons) */
530 afs_PrefetchNoCache(struct vcache *avc,
532 struct nocache_read_request *bparms)
536 struct iovec *iovecp;
538 struct vrequest *areq;
540 struct rx_connection *rxconn;
541 #ifdef AFS_64BIT_CLIENT
542 afs_int32 length_hi, bytes, locked;
546 struct rx_call *tcall;
548 struct AFSVolSync tsync;
549 struct AFSFetchStatus OutStatus;
550 struct AFSCallBack CallBack;
552 struct tlocal1 *tcallspec;
557 iovecp = auio->uio_iov;
560 tcallspec = osi_Alloc(sizeof(struct tlocal1));
562 tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK /* ignored */, &rxconn);
564 avc->callback = tc->parent->srvr->server;
565 tcall = rx_NewCall(rxconn);
566 #ifdef AFS_64BIT_CLIENT
567 if (!afs_serverHasNo64Bit(tc)) {
568 code = StartRXAFS_FetchData64(tcall,
569 (struct AFSFid *) &avc->f.fid.Fid,
573 COND_GUNLOCK(locked);
574 bytes = rx_Read(tcall, (char *)&length_hi,
576 COND_RE_GLOCK(locked);
578 if (bytes != sizeof(afs_int32)) {
580 COND_GUNLOCK(locked);
581 code = rx_EndCall(tcall, RX_PROTOCOL_ERROR);
582 COND_RE_GLOCK(locked);
586 } /* afs_serverHasNo64Bit */
587 if (code == RXGEN_OPCODE || afs_serverHasNo64Bit(tc)) {
588 if (auio->uio_offset > 0x7FFFFFFF) {
592 pos = auio->uio_offset;
593 COND_GUNLOCK(locked);
595 tcall = rx_NewCall(rxconn);
596 code = StartRXAFS_FetchData(tcall,
597 (struct AFSFid *) &avc->f.fid.Fid,
598 pos, bparms->length);
599 COND_RE_GLOCK(locked);
601 afs_serverSetNo64Bit(tc);
604 code = StartRXAFS_FetchData(tcall,
605 (struct AFSFid *) &avc->f.fid.Fid,
606 auio->uio_offset, bparms->length);
609 code = afs_NoCacheFetchProc(tcall, avc, auio,
610 1 /* release_pages */,
613 afs_warn("BYPASS: StartRXAFS_FetchData failed: %d\n", code);
614 unlock_and_release_pages(auio);
615 (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
616 AFS_STATS_FS_RPCIDX_FETCHDATA,
621 code = EndRXAFS_FetchData(tcall, &tcallspec->OutStatus,
622 &tcallspec->CallBack,
625 afs_warn("BYPASS: NoCacheFetchProc failed: %d\n", code);
627 code = rx_EndCall(tcall, code);
629 afs_warn("BYPASS: No connection.\n");
631 unlock_and_release_pages(auio);
632 (void)afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
633 AFS_STATS_FS_RPCIDX_FETCHDATA,
637 } while (afs_Analyze(tc, rxconn, code, &avc->f.fid, areq,
638 AFS_STATS_FS_RPCIDX_FETCHDATA,
642 * Copy appropriate fields into vcache
646 afs_ProcessFS(avc, &tcallspec->OutStatus, areq);
648 afs_DestroyReq(areq);
649 osi_Free(tcallspec, sizeof(struct tlocal1));
650 osi_Free(bparms, sizeof(struct nocache_read_request));
652 /* in UKERNEL, the "pages" are passed in */
653 osi_Free(iovecp, auio->uio_iovcnt * sizeof(struct iovec));
654 osi_Free(auio, sizeof(struct uio));