fileserver])],
,
[enable_namei_fileserver="default"])
+AC_ARG_ENABLE([cache-bypass],
+ [AS_HELP_STRING([--enable-cache-bypass],
+ [enable client support for cache bypass])],
+ ,
+ [enable_cache_bypass="no"])
AC_ARG_ENABLE([supergroups],
[AS_HELP_STRING([--enable-supergroups],
[enable support for nested pts groups])],
AC_DEFINE(AFS_LARGEFILE_ENV, 1, [define if you want large file fileserver])
fi
+if test "$enable_cache_bypass" = "yes"; then
+ AC_DEFINE(AFS_CACHE_BYPASS, 1, [define to activate cache bypassing Unix client])
+fi
+
if test "$enable_namei_fileserver" = "yes"; then
AC_DEFINE(AFS_NAMEI_ENV, 1, [define if you want to want namei fileserver])
else
afs_atomlist_put(al_mem_pool, lmp); /* return osi_linux_mem struct to pool */
afs_linux_cur_allocs--;
} else {
+ BUG();
printf("osi_linux_free: failed to remove chunk from hashtable\n");
}
osi_proc_init();
osi_ioctl_init();
#endif
-
+#if defined(AFS_CACHE_BYPASS)
+ afs_warn("Cache bypass patched libafs module init.\n");
+#endif
return 0;
}
cleanup_module(void)
#endif
{
+#if defined(AFS_CACHE_BYPASS)
+ afs_warn("Cache bypass patched libafs module cleaning up.\n");
+#endif
#ifdef LINUX_KEYRING_SUPPORT
osi_keyring_shutdown();
#endif
#endif
#if defined(AFS_LINUX26_ENV)
#include "h/writeback.h"
+#include "h/pagevec.h"
+#endif
+#if defined(AFS_CACHE_BYPASS)
+#include "afs/lock.h"
+#include "afs/afs_bypasscache.h"
#endif
#ifdef pgoff2loff
#endif
#if defined(AFS_LINUX26_ENV)
+#define LockPage(pp) lock_page(pp)
#define UnlockPage(pp) unlock_page(pp)
#endif
extern struct vcache *afs_globalVp;
+#if defined(AFS_LINUX26_ENV)
+/* Some uses of BKL are perhaps not needed for bypass or memcache--
+ * why don't we try it out? */
+extern struct afs_cacheOps afs_UfsCacheOps;
+#define maybe_lock_kernel() \
+ do { \
+ if(afs_cacheType == &afs_UfsCacheOps) \
+ lock_kernel(); \
+ } while(0);
+
+
+#define maybe_unlock_kernel() \
+ do { \
+ if(afs_cacheType == &afs_UfsCacheOps) \
+ unlock_kernel(); \
+ } while(0);
+#endif /* AFS_CACHE_BYPASS */
+
static ssize_t
afs_linux_read(struct file *fp, char *buf, size_t count, loff_t * offp)
{
struct vcache *vcp = VTOAFS(fp->f_dentry->d_inode);
cred_t *credp = crref();
struct vrequest treq;
-
+ afs_size_t isize, offindex;
AFS_GLOCK();
afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp,
ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32,
99999);
-
/* get a validated vcache entry */
code = afs_InitReq(&treq, credp);
if (!code)
if (code)
code = -code;
else {
+ isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT;
+ offindex = *offp >> PAGE_CACHE_SHIFT;
+ if(offindex > isize) {
+ code=0;
+ goto done;
+ }
+
osi_FlushPages(vcp, credp); /* ensure stale pages are gone */
AFS_GUNLOCK();
#ifdef DO_SYNC_READ
afs_Trace4(afs_iclSetp, CM_TRACE_READOP, ICL_TYPE_POINTER, vcp,
ICL_TYPE_OFFSET, offp, ICL_TYPE_INT32, count, ICL_TYPE_INT32,
code);
-
+done:
AFS_GUNLOCK();
crfree(credp);
return code;
struct afs_fakestat_state fakestat;
#if defined(AFS_LINUX26_ENV)
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
AFS_STATCNT(afs_readdir);
out1:
AFS_GUNLOCK();
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
return code;
}
int code;
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
code = afs_open(&vcp, fp->f_flags, credp);
AFS_GUNLOCK();
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
int code = 0;
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
code = afs_close(vcp, fp->f_flags, credp);
AFS_GUNLOCK();
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
cred_t *credp = crref();
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
code = afs_fsync(VTOAFS(ip), credp);
AFS_GUNLOCK();
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
return -code;
struct vcache *vcp;
cred_t *credp;
int code;
+#if defined(AFS_CACHE_BYPASS)
+ int bypasscache;
+#endif
AFS_GLOCK();
code = afs_InitReq(&treq, credp);
if (code)
goto out;
+#if defined(AFS_CACHE_BYPASS)
+ /* If caching is bypassed for this file, or globally, just return 0 */
+ if(cache_bypass_strategy == ALWAYS_BYPASS_CACHE)
+ bypasscache = 1;
+ else {
+ ObtainReadLock(&vcp->lock);
+ if(vcp->cachingStates & FCSBypass)
+ bypasscache = 1;
+ ReleaseReadLock(&vcp->lock);
+ }
+ if(bypasscache) {
+ /* future proof: don't rely on 0 return from afs_InitReq */
+ code = 0; goto out;
+ }
+#endif
ObtainSharedLock(&vcp->lock, 535);
if ((vcp->execsOrWriters > 0) && (file_count(fp) == 1)) {
int code;
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
AFS_GUNLOCK();
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
int valid;
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
d_drop(dp);
}
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
return valid;
vattr.va_type = mode & S_IFMT;
#if defined(AFS_LINUX26_ENV)
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
code = afs_create(VTOAFS(dip), (char *)name, &vattr, NONEXCL, mode,
AFS_GUNLOCK();
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
return -code;
int code;
#if defined(AFS_LINUX26_ENV)
- lock_kernel();
+ maybe_lock_kernel();
#endif
AFS_GLOCK();
code = afs_lookup(VTOAFS(dip), comp, &vcp, credp);
#endif
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
struct vcache *tvc = VTOAFS(dp->d_inode);
#if defined(AFS_LINUX26_ENV)
- lock_kernel();
+ maybe_lock_kernel();
#endif
if (VREFCOUNT(tvc) > 1 && tvc->opens > 0
&& !(tvc->states & CUnlinked)) {
d_drop(dp);
out:
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
return -code;
const char *name = dp->d_name.name;
#if defined(AFS_LINUX26_ENV)
- lock_kernel();
+ maybe_lock_kernel();
#endif
VATTR_NULL(&vattr);
vattr.va_mask = ATTR_MODE;
AFS_GUNLOCK();
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
return -code;
#if defined(AFS_LINUX26_ENV)
/* Prevent any new references during rename operation. */
- lock_kernel();
+ maybe_lock_kernel();
if (!d_unhashed(newdp)) {
d_drop(newdp);
d_rehash(rehash);
#if defined(AFS_LINUX26_ENV)
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
crfree(credp);
#endif /* AFS_LINUX24_ENV */
#endif /* USABLE_KERNEL_PAGE_SYMLINK_CACHE */
+#if defined(AFS_CACHE_BYPASS)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+/* The kernel calls readpages before trying readpage, with a list of
+ * pages. The readahead algorithm expands num_pages when it thinks
+ * the application will benefit. Unlike readpage, the pages are not
+ * necessarily allocated. If we do not a) allocate required pages and
+ * b) remove them from page_list, linux will re-enter at afs_linux_readpage
+ * for each required page (and the page will be pre-allocated) */
+
+static int
+afs_linux_readpages(struct file *fp, struct address_space *mapping,
+ struct list_head *page_list, unsigned num_pages)
+{
+ afs_int32 page_ix;
+ uio_t *auio;
+ afs_offs_t offset;
+ struct iovec* iovecp;
+ struct nocache_read_request *ancr;
+ struct page *pp, *ppt;
+ struct pagevec lrupv;
+ afs_int32 code = 0;
+
+ cred_t *credp;
+ struct inode *ip = FILE_INODE(fp);
+ struct vcache *avc = VTOAFS(ip);
+ afs_int32 bypasscache = 0; /* bypass for this read */
+ afs_int32 base_index = 0;
+ afs_int32 page_count = 0;
+ afs_int32 isize;
+
+ credp = crref();
+
+ switch(cache_bypass_strategy) {
+ case NEVER_BYPASS_CACHE:
+ break;
+ case ALWAYS_BYPASS_CACHE:
+ bypasscache = 1;
+ break;
+ case LARGE_FILES_BYPASS_CACHE:
+ if(i_size_read(ip) > cache_bypass_threshold) {
+ bypasscache = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* In the new incarnation of selective caching, a file's caching policy
+ * can change, eg because file size exceeds threshold, etc. */
+ trydo_cache_transition(avc, credp, bypasscache);
+
+ if(!bypasscache) {
+ while(!list_empty(page_list)) {
+ pp = list_entry(page_list->prev, struct page, lru);
+ list_del(&pp->lru);
+ }
+ goto out;
+ }
+ /* background thread must free: iovecp, auio, ancr */
+ iovecp = osi_Alloc(num_pages * sizeof(struct iovec));
+
+ auio = osi_Alloc(sizeof(uio_t));
+ auio->uio_iov = iovecp;
+ auio->uio_iovcnt = num_pages;
+ auio->uio_flag = UIO_READ;
+ auio->uio_seg = AFS_UIOSYS;
+ auio->uio_resid = num_pages * PAGE_SIZE;
+
+ ancr = osi_Alloc(sizeof(struct nocache_read_request));
+ ancr->auio = auio;
+ ancr->offset = auio->uio_offset;
+ ancr->length = auio->uio_resid;
+
+ pagevec_init(&lrupv, 0);
+
+ for(page_ix = 0; page_ix < num_pages; ++page_ix) {
+
+ if(list_empty(page_list))
+ break;
+
+ pp = list_entry(page_list->prev, struct page, lru);
+ /* If we allocate a page and don't remove it from page_list,
+ * the page cache gets upset. */
+ list_del(&pp->lru);
+ isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT;
+ if(pp->index > isize) {
+ if(PageLocked(pp))
+ UnlockPage(pp);
+ continue;
+ }
+
+ if(page_ix == 0) {
+ offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT;
+ auio->uio_offset = offset;
+ base_index = pp->index;
+ }
+ iovecp[page_ix].iov_len = PAGE_SIZE;
+ code = add_to_page_cache(pp, mapping, pp->index, GFP_KERNEL);
+ if(base_index != pp->index) {
+ if(PageLocked(pp))
+ UnlockPage(pp);
+ page_cache_release(pp);
+ iovecp[page_ix].iov_base = (void *) 0;
+ base_index++;
+ continue;
+ }
+ base_index++;
+ if(code) {
+ if(PageLocked(pp))
+ UnlockPage(pp);
+ page_cache_release(pp);
+ iovecp[page_ix].iov_base = (void *) 0;
+ } else {
+ page_count++;
+ if(!PageLocked(pp)) {
+ LockPage(pp);
+ }
+
+ /* save the page for background map */
+ iovecp[page_ix].iov_base = (void*) pp;
+
+ /* and put it on the LRU cache */
+ if (!pagevec_add(&lrupv, pp))
+ __pagevec_lru_add(&lrupv);
+ }
+ }
+
+ /* If there were useful pages in the page list, make sure all pages
+ * are in the LRU cache, then schedule the read */
+ if(page_count) {
+ pagevec_lru_add(&lrupv);
+ code = afs_ReadNoCache(avc, ancr, credp);
+ } else {
+ /* If there is nothing for the background thread to handle,
+ * it won't be freeing the things that we never gave it */
+ osi_Free(iovecp, num_pages * sizeof(struct iovec));
+ osi_Free(auio, sizeof(uio_t));
+ osi_Free(ancr, sizeof(struct nocache_read_request));
+ }
+ /* we do not flush, release, or unmap pages--that will be
+ * done for us by the background thread as each page comes in
+ * from the fileserver */
+ crfree(credp);
+
+out:
+ return -code;
+}
+
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) */
+#endif /* defined(AFS_CACHE_BYPASS */
+
+
/* afs_linux_readpage
* all reads come through here. A strategy-like read call.
*/
static int
afs_linux_readpage(struct file *fp, struct page *pp)
{
- int code;
- cred_t *credp = crref();
+ afs_int32 code;
+ cred_t *credp = crref();
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
- char *address;
- afs_offs_t offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT;
+ char *address;
+ afs_offs_t offset = ((loff_t) pp->index) << PAGE_CACHE_SHIFT;
#else
- ulong address = afs_linux_page_address(pp);
- afs_offs_t offset = pageoff(pp);
+ ulong address = afs_linux_page_address(pp);
+ afs_offs_t offset = pageoff(pp);
#endif
- uio_t tuio;
- struct iovec iovec;
- struct inode *ip = FILE_INODE(fp);
- int cnt = page_count(pp);
- struct vcache *avc = VTOAFS(ip);
-
+#if defined(AFS_CACHE_BYPASS)
+ afs_int32 bypasscache = 0; /* bypass for this read */
+ struct nocache_read_request *ancr;
+#endif
+ afs_int32 isize;
+ uio_t *auio;
+ struct iovec *iovecp;
+ struct inode *ip = FILE_INODE(fp);
+ afs_int32 cnt = page_count(pp);
+ struct vcache *avc = VTOAFS(ip);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
- address = kmap(pp);
- ClearPageError(pp);
+ address = kmap(pp);
+ ClearPageError(pp);
#else
- atomic_add(1, &pp->count);
- set_bit(PG_locked, &pp->flags); /* other bits? See mm.h */
- clear_bit(PG_error, &pp->flags);
-#endif
-
- setup_uio(&tuio, &iovec, (char *)address, offset, PAGE_SIZE, UIO_READ,
- AFS_UIOSYS);
+ atomic_add(1, &pp->count);
+ set_bit(PG_locked, &pp->flags); /* other bits? See mm.h */
+ clear_bit(PG_error, &pp->flags);
+#endif
+ /* If the page is past the end of the file, skip it */
+ isize = (i_size_read(fp->f_mapping->host) - 1) >> PAGE_CACHE_SHIFT;
+ if(pp->index > isize) {
+ if(PageLocked(pp))
+ UnlockPage(pp);
+ goto done;
+ }
+ /* if bypasscache, receiver frees, else we do */
+ auio = osi_Alloc(sizeof(uio_t));
+ iovecp = osi_Alloc(sizeof(struct iovec));
+
+ setup_uio(auio, iovecp, (char *)address, offset, PAGE_SIZE, UIO_READ,
+ AFS_UIOSYS);
+
+#if defined(AFS_CACHE_BYPASS)
+
+ switch(cache_bypass_strategy) {
+ case NEVER_BYPASS_CACHE:
+ break;
+ case ALWAYS_BYPASS_CACHE:
+ bypasscache = 1;
+ break;
+ case LARGE_FILES_BYPASS_CACHE:
+ if(i_size_read(ip) > cache_bypass_threshold) {
+ bypasscache = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* In the new incarnation of selective caching, a file's caching policy
+ * can change, eg because file size exceeds threshold, etc. */
+ trydo_cache_transition(avc, credp, bypasscache);
+
+ if(bypasscache) {
+ if(address)
+ kunmap(pp);
+ /* save the page for background map */
+ auio->uio_iov->iov_base = (void*) pp;
+ /* the background thread will free this */
+ ancr = osi_Alloc(sizeof(struct nocache_read_request));
+ ancr->auio = auio;
+ ancr->offset = offset;
+ ancr->length = PAGE_SIZE;
+
+ maybe_lock_kernel();
+ code = afs_ReadNoCache(avc, ancr, credp);
+ maybe_unlock_kernel();
+
+ goto done; /* skips release page, doing it in bg thread */
+ }
+#endif
+
#ifdef AFS_LINUX24_ENV
- lock_kernel();
+ maybe_lock_kernel();
#endif
- AFS_GLOCK();
- afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, 99999); /* not a possible code value */
- code = afs_rdwr(avc, &tuio, UIO_READ, 0, credp);
- afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip,
- ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32,
- code);
- AFS_GUNLOCK();
+ AFS_GLOCK();
+ afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip, ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32, 99999); /* not a possible code value */
+
+ code = afs_rdwr(avc, auio, UIO_READ, 0, credp);
+
+ afs_Trace4(afs_iclSetp, CM_TRACE_READPAGE, ICL_TYPE_POINTER, ip,
+ ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, cnt, ICL_TYPE_INT32,
+ code);
+ AFS_GUNLOCK();
#ifdef AFS_LINUX24_ENV
- unlock_kernel();
+ maybe_unlock_kernel();
#endif
+ if (!code) {
+ /* XXX valid for no-cache also? Check last bits of files... :)
+ * Cognate code goes in afs_NoCacheFetchProc. */
+ if (auio->uio_resid) /* zero remainder of page */
+ memset((void *)(address + (PAGE_SIZE - auio->uio_resid)), 0,
+ auio->uio_resid);
- if (!code) {
- if (tuio.uio_resid) /* zero remainder of page */
- memset((void *)(address + (PAGE_SIZE - tuio.uio_resid)), 0,
- tuio.uio_resid);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
- flush_dcache_page(pp);
- SetPageUptodate(pp);
+ flush_dcache_page(pp);
+ SetPageUptodate(pp);
#else
- set_bit(PG_uptodate, &pp->flags);
+ set_bit(PG_uptodate, &pp->flags);
#endif
- }
+ } /* !code */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
- kunmap(pp);
- UnlockPage(pp);
+ kunmap(pp);
+ UnlockPage(pp);
#else
- clear_bit(PG_locked, &pp->flags);
- wake_up(&pp->wait);
- free_page(address);
+ clear_bit(PG_locked, &pp->flags);
+ wake_up(&pp->wait);
+ free_page(address);
#endif
- if (!code && AFS_CHUNKOFFSET(offset) == 0) {
- struct dcache *tdc;
- struct vrequest treq;
+#if defined(AFS_CACHE_BYPASS)
- AFS_GLOCK();
- code = afs_InitReq(&treq, credp);
- if (!code && !NBObtainWriteLock(&avc->lock, 534)) {
- tdc = afs_FindDCache(avc, offset);
- if (tdc) {
- if (!(tdc->mflags & DFNextStarted))
- afs_PrefetchChunk(avc, tdc, credp, &treq);
- afs_PutDCache(tdc);
- }
- ReleaseWriteLock(&avc->lock);
- }
- AFS_GUNLOCK();
- }
+/* do not call afs_GetDCache if cache is bypassed */
+ if(bypasscache)
+ goto done;
+
+#endif
- crfree(credp);
- return -code;
+ /* free if not bypassing cache */
+ osi_Free(auio, sizeof(uio_t));
+ osi_Free(iovecp, sizeof(struct iovec));
+
+ if (!code && AFS_CHUNKOFFSET(offset) == 0) {
+ struct dcache *tdc;
+ struct vrequest treq;
+
+ AFS_GLOCK();
+ code = afs_InitReq(&treq, credp);
+ if (!code && !NBObtainWriteLock(&avc->lock, 534)) {
+ tdc = afs_FindDCache(avc, offset);
+ if (tdc) {
+ if (!(tdc->mflags & DFNextStarted))
+ afs_PrefetchChunk(avc, tdc, credp, &treq);
+ afs_PutDCache(tdc);
+ }
+ ReleaseWriteLock(&avc->lock);
+ }
+ AFS_GUNLOCK();
+ }
+
+done:
+ crfree(credp);
+ return -code;
}
base = (((loff_t) pp->index) << PAGE_CACHE_SHIFT) + offset;
credp = crref();
- lock_kernel();
+ maybe_lock_kernel();
AFS_GLOCK();
afs_Trace4(afs_iclSetp, CM_TRACE_UPDATEPAGE, ICL_TYPE_POINTER, vcp,
ICL_TYPE_POINTER, pp, ICL_TYPE_INT32, page_count(pp),
ICL_TYPE_INT32, code);
AFS_GUNLOCK();
- unlock_kernel();
+ maybe_unlock_kernel();
crfree(credp);
kunmap(pp);
.setattr = afs_notify_change,
#else
.default_file_ops = &afs_file_fops,
- .readpage = afs_linux_readpage,
+ .readpage = afs_linux_readpage,
.revalidate = afs_linux_revalidate,
.updatepage = afs_linux_updatepage,
#endif
#if defined(AFS_LINUX24_ENV)
static struct address_space_operations afs_file_aops = {
.readpage = afs_linux_readpage,
+#if defined(AFS_CACHE_BYPASS) && LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ .readpages = afs_linux_readpages,
+#endif
.writepage = afs_linux_writepage,
.commit_write = afs_linux_commit_write,
.prepare_write = afs_linux_prepare_write,
char *p = (char *)kmap(page);
int code;
- lock_kernel();
+ maybe_lock_kernel();
AFS_GLOCK();
code = afs_linux_ireadlink(ip, p, PAGE_SIZE, AFS_UIOSYS);
AFS_GUNLOCK();
if (code < 0)
goto fail;
p[code] = '\0'; /* null terminate? */
- unlock_kernel();
+ maybe_unlock_kernel();
SetPageUptodate(page);
kunmap(page);
return 0;
fail:
- unlock_kernel();
+ maybe_unlock_kernel();
SetPageError(page);
kunmap(page);
#define BOP_STORE 2 /* parm1 is chunk to store */
#define BOP_PATH 3 /* parm1 is path, parm2 is chunk to fetch */
+#if defined(AFS_CACHE_BYPASS)
+#define BOP_FETCH_NOCACHE 4 /* parms are: vnode ptr, offset, segment ptr, addr, cred ptr */
+#endif
+
#define B_DONTWAIT 1 /* On failure return; don't wait */
/* protocol is: refCount is incremented by user to take block out of free pool.
/*... to be continued ... */
#endif
+#if defined(AFS_CACHE_BYPASS)
+/* vcache (file) cachingStates bits */
+#define FCSDesireBypass 0x1 /* This file should bypass the cache */
+#define FCSBypass 0x2 /* This file is currently NOT being cached */
+#define FCSManuallySet 0x4 /* The bypass flags were set, or reset, manually (via pioctl)
+ and should not be overridden by the file's name */
+
+/* Flag values used by the Transition routines */
+#define TRANSChangeDesiredBit 0x1 /* The Transition routine should set or
+ * reset the FCSDesireBypass bit */
+#define TRANSVcacheIsLocked 0x2 /* The Transition routine does not need to
+ * lock vcache (it's already locked) */
+#define TRANSSetManualBit 0x4 /* The Transition routine should set FCSManuallySet so that
+ * filename checking does not override pioctl requests */
+#endif /* AFS_CACHE_BYPASS */
+
#define CPSIZE 2
#if defined(AFS_XBSD_ENV) || defined(AFS_DARWIN_ENV)
#define vrefCount v->v_usecount
* this file. */
short flockCount; /* count of flock readers, or -1 if writer */
char mvstat; /* 0->normal, 1->mt pt, 2->root. */
+
+#if defined(AFS_CACHE_BYPASS)
+ char cachingStates; /* Caching policies for this file */
+ afs_uint32 cachingTransitions; /* # of times file has flopped between caching and not */
+#if defined(AFS_LINUX24_ENV)
+ off_t next_seq_offset; /* Next sequential offset (used by prefetch/readahead) */
+#else
+ off_t next_seq_blk_offset; /* accounted in blocks for Solaris & IRIX */
+#endif
+#endif
+
afs_uint32 states; /* state bits */
#if defined(AFS_SUN5_ENV)
afs_uint32 vstates; /* vstate bits */
--- /dev/null
+/*
+ * COPYRIGHT © 2000
+ * THE REGENTS OF THE UNIVERSITY OF MICHIGAN
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization. If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+ /*
+ * Portions Copyright (c) 2008
+ * The Linux Box Corporation
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of the Linux Box
+ * Corporation is not used in any advertising or publicity
+ * pertaining to the use or distribution of this software
+ * without specific, written prior authorization. If the
+ * above copyright notice or any other identification of the
+ * Linux Box Corporation is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * This software is provided as is, without representation
+ * from the Linux Box Corporation as to its fitness for any
+ * purpose, and without warranty by the Linux Box Corporation
+ * of any kind, either express or implied, including
+ * without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose. The
+ * Linux Box Corporation shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential
+ * damages, with respect to any claim arising out of or in
+ * connection with the use of the software, even if it has been
+ * or is hereafter advised of the possibility of such damages.
+ */
+
+
+#include <afsconfig.h>
+#include "afs/param.h"
+
+#if defined(AFS_CACHE_BYPASS)
+
+#include "afs/afs_bypasscache.h"
+
+/*
+ * afs_bypasscache.c
+ *
+ */
+#include "afs/sysincludes.h" /* Standard vendor system headers */
+#include "afs/afsincludes.h" /* Afs-based standard headers */
+#include "afs/afs_stats.h" /* statistics */
+#include "afs/nfsclient.h"
+#include "rx/rx_globals.h"
+
+#if defined(AFS_LINUX26_ENV)
+#define LockPage(pp) lock_page(pp)
+#define UnlockPage(pp) unlock_page(pp)
+#endif
+#define AFS_KMAP_ATOMIC
+
+#ifndef afs_min
+#define afs_min(A,B) ((A)<(B)) ? (A) : (B)
+#endif
+
+/* conditional GLOCK macros */
+#define COND_GLOCK(var) \
+ do { \
+ var = ISAFS_GLOCK(); \
+ if(!var) \
+ RX_AFS_GLOCK(); \
+ } while(0);
+
+#define COND_RE_GUNLOCK(var) \
+ do { \
+ if(var) \
+ RX_AFS_GUNLOCK(); \
+ } while(0);
+
+
+/* conditional GUNLOCK macros */
+
+#define COND_GUNLOCK(var) \
+ do { \
+ var = ISAFS_GLOCK(); \
+ if(var) \
+ RX_AFS_GUNLOCK(); \
+ } while(0);
+
+#define COND_RE_GLOCK(var) \
+ do { \
+ if(var) \
+ RX_AFS_GLOCK(); \
+ } while(0);
+
+
+int cache_bypass_strategy = NEVER_BYPASS_CACHE;
+int cache_bypass_threshold = AFS_CACHE_BYPASS_DISABLED; /* file size > threshold triggers bypass */
+int cache_bypass_prefetch = 1; /* Should we do prefetching ? */
+
+extern afs_rwlock_t afs_xcbhash;
+
+/*
+ * This is almost exactly like the PFlush() routine in afs_pioctl.c,
+ * but that routine is static. We are about to change a file from
+ * normal caching to bypass it's caching. Therefore, we want to
+ * free up any cache space in use by the file, and throw out any
+ * existing VM pages for the file. We keep track of the number of
+ * times we go back and forth from caching to bypass.
+ */
+void afs_TransitionToBypass(register struct vcache *avc, register struct AFS_UCRED *acred, int aflags)
+{
+
+ afs_int32 code;
+ struct vrequest treq;
+ int setDesire = 0;
+ int setManual = 0;
+
+ if(!avc)
+ return;
+
+ if(avc->states & FCSBypass)
+ osi_Panic("afs_TransitionToBypass: illegal transition to bypass--already FCSBypass\n");
+
+ if(aflags & TRANSChangeDesiredBit)
+ setDesire = 1;
+ if(aflags & TRANSSetManualBit)
+ setManual = 1;
+
+#ifdef AFS_BOZONLOCK_ENV
+ afs_BozonLock(&avc->pvnLock, avc); /* Since afs_TryToSmush will do a pvn_vptrunc */
+#else
+ AFS_GLOCK();
+#endif
+ ObtainWriteLock(&avc->lock, 925);
+
+ /* If we never cached this, just change state */
+ if(setDesire && (!avc->cachingStates & FCSBypass)) {
+ avc->states |= FCSBypass;
+ goto done;
+ }
+ /* cg2v, try to store any chunks not written 20071204 */
+ if (avc->execsOrWriters > 0) {
+ code = afs_InitReq(&treq, acred);
+ if(!code)
+ code = afs_StoreAllSegments(avc, &treq, AFS_SYNC | AFS_LASTSTORE);
+ }
+#if 0
+ /* also cg2v, don't dequeue the callback */
+ ObtainWriteLock(&afs_xcbhash, 956);
+ afs_DequeueCallback(avc);
+ ReleaseWriteLock(&afs_xcbhash);
+#endif
+ avc->states &= ~(CStatd | CDirty); /* next reference will re-stat cache entry */
+ /* now find the disk cache entries */
+ afs_TryToSmush(avc, acred, 1);
+ osi_dnlc_purgedp(avc);
+ if (avc->linkData && !(avc->states & CCore)) {
+ afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
+ avc->linkData = NULL;
+ }
+
+ avc->cachingStates |= FCSBypass; /* Set the bypass flag */
+ if(setDesire)
+ avc->cachingStates |= FCSDesireBypass;
+ if(setManual)
+ avc->cachingStates |= FCSManuallySet;
+ avc->cachingTransitions++;
+
+done:
+ ReleaseWriteLock(&avc->lock);
+#ifdef AFS_BOZONLOCK_ENV
+ afs_BozonUnlock(&avc->pvnLock, avc);
+#else
+ AFS_GUNLOCK();
+#endif
+}
+
+/*
+ * This is almost exactly like the PFlush() routine in afs_pioctl.c,
+ * but that routine is static. We are about to change a file from
+ * bypassing caching to normal caching. Therefore, we want to
+ * throw out any existing VM pages for the file. We keep track of
+ * the number of times we go back and forth from caching to bypass.
+ */
+void afs_TransitionToCaching(register struct vcache *avc, register struct AFS_UCRED *acred, int aflags)
+{
+ int resetDesire = 0;
+ int setManual = 0;
+
+ if(!avc)
+ return;
+
+ if(!avc->states & FCSBypass)
+ osi_Panic("afs_TransitionToCaching: illegal transition to caching--already caching\n");
+
+ if(aflags & TRANSChangeDesiredBit)
+ resetDesire = 1;
+ if(aflags & TRANSSetManualBit)
+ setManual = 1;
+
+#ifdef AFS_BOZONLOCK_ENV
+ afs_BozonLock(&avc->pvnLock, avc); /* Since afs_TryToSmush will do a pvn_vptrunc */
+#else
+ AFS_GLOCK();
+#endif
+ ObtainWriteLock(&avc->lock, 926);
+
+ /* Ok, we actually do need to flush */
+ ObtainWriteLock(&afs_xcbhash, 957);
+ afs_DequeueCallback(avc);
+ avc->states &= ~(CStatd | CDirty); /* next reference will re-stat cache entry */
+ ReleaseWriteLock(&afs_xcbhash);
+ /* now find the disk cache entries */
+ afs_TryToSmush(avc, acred, 1);
+ osi_dnlc_purgedp(avc);
+ if (avc->linkData && !(avc->states & CCore)) {
+ afs_osi_Free(avc->linkData, strlen(avc->linkData) + 1);
+ avc->linkData = NULL;
+ }
+
+ avc->cachingStates &= ~(FCSBypass); /* Reset the bypass flag */
+ if (resetDesire)
+ avc->cachingStates &= ~(FCSDesireBypass);
+ if (setManual)
+ avc->cachingStates |= FCSManuallySet;
+ avc->cachingTransitions++;
+
+ ReleaseWriteLock(&avc->lock);
+#ifdef AFS_BOZONLOCK_ENV
+ afs_BozonUnlock(&avc->pvnLock, avc);
+#else
+ AFS_GUNLOCK();
+#endif
+}
+
+/* In the case where there's an error in afs_NoCacheFetchProc or
+ * afs_PrefetchNoCache, all of the pages they've been passed need
+ * to be unlocked.
+ */
+#if defined(AFS_LINUX24_ENV)
+#define unlock_pages(auio) \
+ do { \
+ struct iovec *ciov; \
+ struct page *pp; \
+ afs_int32 iovmax; \
+ afs_int32 iovno = 0; \
+ ciov = auio->uio_iov; \
+ iovmax = auio->uio_iovcnt - 1; \
+ pp = (struct page*) ciov->iov_base; \
+ afs_warn("BYPASS: Unlocking pages..."); \
+ while(1) { \
+ if(pp != NULL && PageLocked(pp)) \
+ UnlockPage(pp); \
+ iovno++; \
+ if(iovno > iovmax) \
+ break; \
+ ciov = (auio->uio_iov + iovno); \
+ pp = (struct page*) ciov->iov_base; \
+ } \
+ afs_warn("Pages Unlocked.\n"); \
+ } while(0);
+#else
+#ifdef UKERNEL
+#define unlock_pages(auio) \
+ do { } while(0);
+#else
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif
+
+/* no-cache prefetch routine */
+static afs_int32
+afs_NoCacheFetchProc(register struct rx_call *acall,
+ register struct vcache *avc,
+ register uio_t *auio,
+ afs_int32 release_pages)
+{
+ afs_int32 length;
+ afs_int32 code;
+ int tlen;
+ int moredata, iovno, iovoff, iovmax, clen, result, locked;
+ struct iovec *ciov;
+ struct page *pp;
+ char *address;
+#ifdef AFS_KMAP_ATOMIC
+ char *page_buffer = osi_Alloc(PAGE_SIZE);
+#else
+ char *page_buffer = NULL;
+#endif
+
+ ciov = auio->uio_iov;
+ pp = (struct page*) ciov->iov_base;
+ iovmax = auio->uio_iovcnt - 1;
+ iovno = iovoff = result = 0;
+ do {
+
+ COND_GUNLOCK(locked);
+ code = rx_Read(acall, (char *)&length, sizeof(afs_int32));
+ COND_RE_GLOCK(locked);
+
+ if (code != sizeof(afs_int32)) {
+ result = 0;
+ afs_warn("Preread error. code: %d instead of %d\n",
+ code, sizeof(afs_int32));
+ unlock_pages(auio);
+ goto done;
+ } else
+ length = ntohl(length);
+
+ /*
+ * The fetch protocol is extended for the AFS/DFS translator
+ * to allow multiple blocks of data, each with its own length,
+ * to be returned. As long as the top bit is set, there are more
+ * blocks expected.
+ *
+ * We do not do this for AFS file servers because they sometimes
+ * return large negative numbers as the transfer size.
+ */
+ if (avc->states & CForeign) {
+ moredata = length & 0x80000000;
+ length &= ~0x80000000;
+ } else {
+ moredata = 0;
+ }
+
+ while (length > 0) {
+
+ clen = ciov->iov_len - iovoff;
+ tlen = afs_min(length, clen);
+#ifdef AFS_LINUX24_ENV
+#ifndef AFS_KMAP_ATOMIC
+ if(pp)
+ address = kmap(pp);
+ else {
+ /* rx doesn't provide an interface to simply advance
+ or consume n bytes. for now, allocate a PAGE_SIZE
+ region of memory to receive bytes in the case that
+ there were holes in readpages */
+ if(page_buffer == NULL)
+ page_buffer = osi_Alloc(PAGE_SIZE);
+ address = page_buffer;
+ }
+#else
+ address = page_buffer;
+#endif
+#else
+#ifndef UKERNEL
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif /* LINUX24 */
+ COND_GUNLOCK(locked);
+ code = rx_Read(acall, address, tlen);
+ COND_RE_GLOCK(locked);
+
+ if (code < 0) {
+ afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", code);
+ result = 0;
+ unlock_pages(auio);
+ goto done;
+ } else if (code == 0) {
+ result = 0;
+ afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n");
+ unlock_pages(auio);
+ goto done;
+ }
+ length -= code;
+ tlen -= code;
+
+ if(tlen > 0) {
+ iovoff += code;
+ address += code;
+
+ } else {
+#ifdef AFS_LINUX24_ENV
+#ifdef AFS_KMAP_ATOMIC
+ if(pp) {
+ address = kmap_atomic(pp, KM_USER0);
+ memcpy(address, page_buffer, PAGE_SIZE);
+ kunmap_atomic(address, KM_USER0);
+ }
+#endif
+#else
+#ifndef UKERNEL
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif /* LINUX 24 */
+ /* we filled a page, conditionally release it */
+ if(release_pages && ciov->iov_base) {
+ /* this is appropriate when no caller intends to unlock
+ * and release the page */
+#ifdef AFS_LINUX24_ENV
+ SetPageUptodate(pp);
+ if(PageLocked(pp))
+ UnlockPage(pp);
+ else
+ afs_warn("afs_NoCacheFetchProc: page not locked at iovno %d!\n", iovno);
+
+#ifndef AFS_KMAP_ATOMIC
+ kunmap(pp);
+#endif
+#else
+#ifndef UKERNEL
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif /* LINUX24 */
+ }
+ /* and carry uio_iov */
+ iovno++;
+ if(iovno > iovmax) goto done;
+
+ ciov = (auio->uio_iov + iovno);
+ pp = (struct page*) ciov->iov_base;
+ iovoff = 0;
+ }
+ }
+ } while (moredata);
+
+done:
+ if(page_buffer)
+ osi_Free(page_buffer, PAGE_SIZE);
+ return result;
+}
+
+
+/* dispatch a no-cache read request */
+afs_int32
+afs_ReadNoCache(register struct vcache *avc,
+ register struct nocache_read_request *bparms,
+ struct AFS_UCRED *acred)
+{
+ afs_int32 code;
+ afs_int32 bcnt;
+ struct brequest *breq;
+ struct vrequest *areq;
+
+ /* the reciever will free this */
+ areq = osi_Alloc(sizeof(struct vrequest));
+
+ if (avc && avc->vc_error) {
+ code = EIO;
+ afs_warn("afs_ReadNoCache VCache Error!\n");
+ goto cleanup;
+ }
+ if ((code = afs_InitReq(areq, acred))) {
+ afs_warn("afs_ReadNoCache afs_InitReq error!\n");
+ goto cleanup;
+ }
+
+ AFS_GLOCK();
+ code = afs_VerifyVCache(avc, areq);
+ AFS_GUNLOCK();
+
+ if (code) {
+ code = afs_CheckCode(code, areq, 11); /* failed to get it */
+ afs_warn("afs_ReadNoCache Failed to verify VCache!\n");
+ goto cleanup;
+ }
+
+ bparms->areq = areq;
+
+ /* and queue this one */
+ bcnt = 1;
+ AFS_GLOCK();
+ while(bcnt < 20) {
+ breq = afs_BQueue(BOP_FETCH_NOCACHE, avc, B_DONTWAIT, 0, acred, 1, 1, bparms);
+ if(breq != 0) {
+ code = 0;
+ break;
+ }
+ afs_osi_Wait(10 * bcnt, 0, 0);
+ }
+ AFS_GUNLOCK();
+
+ if(!breq) {
+ code = EBUSY;
+ goto cleanup;
+ }
+
+ return code;
+
+cleanup:
+ /* If there's a problem before we queue the request, we need to
+ * do everything that would normally happen when the request was
+ * processed, like unlocking the pages and freeing memory.
+ */
+#ifdef AFS_LINUX24_ENV
+ unlock_pages(bparms->auio);
+#else
+#ifndef UKERNEL
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif
+ osi_Free(areq, sizeof(struct vrequest));
+ osi_Free(bparms->auio->uio_iov, bparms->auio->uio_iovcnt * sizeof(struct iovec));
+ osi_Free(bparms->auio, sizeof(uio_t));
+ osi_Free(bparms, sizeof(struct nocache_read_request));
+ return code;
+
+}
+
+
+/* Cannot have static linkage--called from BPrefetch (afs_daemons) */
+afs_int32
+afs_PrefetchNoCache(register struct vcache *avc,
+ register struct AFS_UCRED *acred,
+ register struct nocache_read_request *bparms)
+{
+ uio_t *auio;
+ struct iovec *iovecp;
+ struct vrequest *areq;
+ afs_int32 code, length_hi, bytes, locked;
+
+ register struct conn *tc;
+ afs_int32 i;
+ struct rx_call *tcall;
+ struct tlocal1 {
+ struct AFSVolSync tsync;
+ struct AFSFetchStatus OutStatus;
+ struct AFSCallBack CallBack;
+ };
+ struct tlocal1 *tcallspec;
+
+ auio = bparms->auio;
+ areq = bparms->areq;
+ iovecp = auio->uio_iov;
+
+ tcallspec = (struct tlocal1 *) osi_Alloc(sizeof(struct tlocal1));
+ do {
+ tc = afs_Conn(&avc->fid, areq, SHARED_LOCK /* ignored */);
+ if (tc) {
+ avc->callback = tc->srvr->server;
+ i = osi_Time();
+ tcall = rx_NewCall(tc->id);
+#ifdef AFS_64BIT_CLIENT
+ if(!afs_serverHasNo64Bit(tc)) {
+ code = StartRXAFS_FetchData64(tcall,
+ (struct AFSFid *) &avc->fid.Fid,
+ auio->uio_offset,
+ bparms->length);
+ if (code == 0) {
+
+ COND_GUNLOCK(locked);
+ bytes = rx_Read(tcall, (char *)&length_hi, sizeof(afs_int32));
+ COND_RE_GLOCK(locked);
+
+ if (bytes != sizeof(afs_int32)) {
+ length_hi = 0;
+ code = rx_Error(tcall);
+ COND_GUNLOCK(locked);
+ code = rx_EndCall(tcall, code);
+ COND_RE_GLOCK(locked);
+ tcall = (struct rx_call *)0;
+ }
+ }
+ if (code == RXGEN_OPCODE || afs_serverHasNo64Bit(tc)) {
+ if (auio->uio_offset > 0x7FFFFFFF) {
+ code = EFBIG;
+ } else {
+ afs_int32 pos;
+ pos = auio->uio_offset;
+ COND_GUNLOCK(locked);
+ if (!tcall)
+ tcall = rx_NewCall(tc->id);
+ code = StartRXAFS_FetchData(tcall,
+ (struct AFSFid *) &avc->fid.Fid,
+ pos, bparms->length);
+ COND_RE_GLOCK(locked);
+ }
+ afs_serverSetNo64Bit(tc);
+ }
+ } /* afs_serverHasNo64Bit */
+#else
+ code = StartRXAFS_FetchData(tcall,
+ (struct AFSFid *) &avc->fid.Fid,
+ auio->uio_offset, bparms->length);
+#endif
+
+ if (code == 0) {
+ code = afs_NoCacheFetchProc(tcall, avc, auio,
+ 1 /* release_pages */);
+ } else {
+ afs_warn("BYPASS: StartRXAFS_FetchData failed: %d\n", code);
+ unlock_pages(auio);
+ goto done;
+ }
+ if (code == 0) {
+ code = EndRXAFS_FetchData(tcall,
+ &tcallspec->OutStatus,
+ &tcallspec->CallBack,
+ &tcallspec->tsync);
+ } else {
+ afs_warn("BYPASS: NoCacheFetchProc failed: %d\n", code);
+ }
+ code = rx_EndCall(tcall, code);
+ }
+ else {
+ afs_warn("BYPASS: No connection.\n");
+ code = -1;
+#ifdef AFS_LINUX24_ENV
+ unlock_pages(auio);
+#else
+#ifndef UKERNEL
+#error AFS_CACHE_BYPASS not implemented on this platform
+#endif
+#endif
+ goto done;
+ }
+ } while (afs_Analyze(tc, code, &avc->fid, areq,
+ AFS_STATS_FS_RPCIDX_FETCHDATA,
+ SHARED_LOCK,0));
+done:
+ /*
+ * Copy appropriate fields into vcache
+ */
+
+ afs_ProcessFS(avc, &tcallspec->OutStatus, areq);
+
+ osi_Free(areq, sizeof(struct vrequest));
+ osi_Free(tcallspec, sizeof(struct tlocal1));
+ osi_Free(iovecp, auio->uio_iovcnt * sizeof(struct iovec));
+ osi_Free(bparms, sizeof(struct nocache_read_request));
+ osi_Free(auio, sizeof(uio_t));
+ return code;
+}
+
+#endif /* AFS_CACHE_BYPASS */
--- /dev/null
+/*
+ * COPYRIGHT © 2000
+ * THE REGENTS OF THE UNIVERSITY OF MICHIGAN
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization. If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY O
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+ /*
+ * Portions Copyright (c) 2008
+ * The Linux Box Corporation
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of the Linux Box
+ * Corporation is not used in any advertising or publicity
+ * pertaining to the use or distribution of this software
+ * without specific, written prior authorization. If the
+ * above copyright notice or any other identification of the
+ * Linux Box Corporation is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * This software is provided as is, without representation
+ * from the Linux Box Corporation as to its fitness for any
+ * purpose, and without warranty by the Linux Box Corporation
+ * of any kind, either express or implied, including
+ * without limitation the implied warranties of
+ * merchantability and fitness for a particular purpose. The
+ * Linux Box Corporation shall not be liable for any damages,
+ * including special, indirect, incidental, or consequential
+ * damages, with respect to any claim arising out of or in
+ * connection with the use of the software, even if it has been
+ * or is hereafter advised of the possibility of such damages.
+ */
+
+
+#ifndef _AFS_BYPASSCACHE_H
+#define _AFS_BYPASSCACHE_H
+
+#if defined(AFS_CACHE_BYPASS)
+
+#include <afsconfig.h>
+#include "afs/param.h"
+#include "afs/sysincludes.h"
+#include "afsincludes.h"
+
+#define AFS_CACHE_BYPASS_DISABLED -1
+
+#ifdef UKERNEL
+typedef struct uio uio_t;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 1024 * sizeof(long) / 8
+#endif
+#endif
+
+/* A ptr to an object of the following type is expected to be passed
+ * as the ab->parm[0] to afs_BQueue */
+struct nocache_read_request {
+ /* Why can't we all get along? */
+#if defined(AFS_SUN5_ENV)
+ /* SOLARIS */
+ u_offset_t offset;
+ struct seg *segment;
+ caddr_t address;
+#elif defined(AFS_SGI_ENV)
+ /* SGI (of some vintage) */
+ int32 offset;
+ int32 rem;
+ int32 pmp; /* mmm */
+ int32 length;
+#elif defined(AFS_LINUX24_ENV) || defined(AFS_USR_LINUX24_ENV)
+ /* The tested platform, as CITI impl. just packs ab->parms */
+ uio_t * auio;
+ struct vrequest *areq;
+ afs_size_t offset;
+ afs_size_t length;
+#endif
+};
+
+enum cache_bypass_strategies
+{
+ ALWAYS_BYPASS_CACHE,
+ NEVER_BYPASS_CACHE,
+ LARGE_FILES_BYPASS_CACHE
+};
+
+extern int cache_bypass_prefetch;
+extern int cache_bypass_strategy;
+extern int cache_bypass_threshold;
+
+void afs_TransitionToBypass(register struct vcache *, register struct AFS_UCRED *, int);
+void afs_TransitionToCaching(register struct vcache *, register struct AFS_UCRED *, int);
+
+/* Cache strategy permits vnode transition between caching and no-cache--
+ * currently, this means LARGE_FILES_BYPASS_CACHE. Currently, no pioctl permits
+ * setting FCSBypass manually for a vnode */
+#define variable_cache_strategy \
+ (! ((cache_bypass_strategy == ALWAYS_BYPASS_CACHE) || \
+ (cache_bypass_strategy == NEVER_BYPASS_CACHE)) )
+
+/* Cache-coherently toggle cache/no-cache for a vnode */
+#define trydo_cache_transition(avc, credp, bypasscache) \
+ do { \
+ if(variable_cache_strategy) { \
+ if(bypasscache) { \
+ if(!(avc->cachingStates & FCSBypass)) \
+ afs_TransitionToBypass(avc, credp, TRANSChangeDesiredBit); \
+ } else { \
+ if(avc->cachingStates & FCSBypass) \
+ afs_TransitionToCaching(avc, credp, TRANSChangeDesiredBit); \
+ } \
+ } \
+ } \
+ while(0);
+
+/* dispatch a no-cache read request */
+afs_int32
+afs_ReadNoCache(register struct vcache *avc, register struct nocache_read_request *bparms,
+ struct AFS_UCRED *acred);
+
+/* no-cache prefetch routine */
+afs_int32
+afs_PrefetchNoCache(register struct vcache *avc, register struct AFS_UCRED *acred,
+ struct nocache_read_request *bparms);
+
+
+#endif /* AFS_CACHE_BYPASS */
+#endif /* _AFS_BYPASSCACHE_H */
+
#include <sys/adspace.h> /* for vm_att(), vm_det() */
#endif
-
+#if defined(AFS_CACHE_BYPASS)
+#include "afs/afs_bypasscache.h"
+#endif// defined(AFS_CACHE_BYPASS)
/* background request queue size */
afs_lock_t afs_xbrs; /* lock for brs */
static int brsInit = 0;
}
}
+#if defined(AFS_CACHE_BYPASS)
+#if 1 /* XXX Matt debugging */
+static
+#endif
+void
+BPrefetchNoCache(register struct brequest *ab)
+{
+ struct vrequest treq;
+ afs_size_t len;
+
+ if ((len = afs_InitReq(&treq, ab->cred)))
+ return;
+
+#ifndef UKERNEL
+ /* OS-specific prefetch routine */
+ afs_PrefetchNoCache(ab->vc, ab->cred, (struct nocache_read_request *) ab->ptr_parm[0]);
+#else
+#warning Cache-bypass code path not implemented in UKERNEL
+#endif
+}
+#endif
static void
BStore(register struct brequest *ab)
tb->opcode);
if (tb->opcode == BOP_FETCH)
BPrefetch(tb);
+#if defined(AFS_CACHE_BYPASS)
+ else if (tb->opcode == BOP_FETCH_NOCACHE)
+ BPrefetchNoCache(tb);
+#endif
else if (tb->opcode == BOP_STORE)
BStore(tb);
else if (tb->opcode == BOP_PATH)
#endif /* AFS_NOSTATS */
AFS_STATCNT(afs_GetDCache);
-
if (dcacheDisabled)
return NULL;
|| afs_freeDCList != NULLIDX)
break;
/* If we can't get space for 5 mins we give up and panic */
- if (++downDCount > 300)
+ if (++downDCount > 300) {
+#if defined(AFS_CACHE_BYPASS)
+ afs_warn("GetDCache calling osi_Panic: No space in five minutes.\n downDCount: %d\n aoffset: %d alen: %d\n", downDCount, aoffset, alen);
+#endif
osi_Panic("getdcache");
+ }
MReleaseWriteLock(&afs_xdcache);
/*
* Locks held:
{
int vfslocked;
afs_hyper_t origDV;
+#if defined(AFS_CACHE_BYPASS)
+ /* The optimization to check DV under read lock below is identical a
+ * change in CITI cache bypass work. The problem CITI found in 1999
+ * was that this code and background daemon doing prefetching competed
+ * for the vcache entry shared lock. It's not clear to me from the
+ * tech report, but it looks like CITI fixed the general prefetch code
+ * path as a bonus when experimenting on prefetch for cache bypass, see
+ * citi-tr-01-3.
+ */
+#endif
ObtainReadLock(&avc->lock);
/* If we've already purged this version, or if we're the ones
* writing this version, don't flush it (could lose the
#include "afsincludes.h" /* Afs-based standard headers */
#include "afs/afs_stats.h" /* afs statistics */
#include "afs/vice.h"
+#include "afs/afs_bypasscache.h"
#include "rx/rx_globals.h"
struct VenusFid afs_rootFid;
DECL_PIOCTL(PNFSNukeCreds);
DECL_PIOCTL(PNewUuid);
DECL_PIOCTL(PPrecache);
+#if defined(AFS_CACHE_BYPASS)
+DECL_PIOCTL(PSetCachingThreshold);
+DECL_PIOCTL(PSetCachingBlkSize);
+#endif
/*
* A macro that says whether we're going to need HandleClientContext().
static int (*(OpioctlSw[])) () = {
PBogus, /* 0 */
- PNFSNukeCreds, /* 1 -- nuke all creds for NFS client */
+ PNFSNukeCreds, /* 1 -- nuke all creds for NFS client */
+#if defined(AFS_CACHE_BYPASS)
+ PSetCachingThreshold /* 2 -- get/set cache-bypass size threshold */
+#else
+ PNoop /* 2 -- get/set cache-bypass size threshold */
+#endif
};
#define PSetClientContext 99 /* Special pioctl to setup caller's creds */
return 0;
}
+#if defined(AFS_CACHE_BYPASS)
+
+DECL_PIOCTL(PSetCachingThreshold)
+{
+ afs_int32 getting;
+ afs_int32 setting;
+
+ setting = getting = 1;
+
+ if (ain == NULL || ainSize < sizeof(afs_int32))
+ setting = 0;
+
+ if (aout == NULL)
+ getting = 0;
+
+ if (setting == 0 && getting == 0)
+ return EINVAL;
+
+ /*
+ * If setting, set first, and return the value now in effect
+ */
+ if (setting) {
+ afs_int32 threshold;
+
+ if (!afs_osi_suser(*acred))
+ return EPERM;
+ memcpy((char *)&threshold, ain, sizeof(afs_int32));
+ cache_bypass_threshold = threshold;
+ afs_warn("Cache Bypass Threshold set to: %d\n", threshold);
+ /* TODO: move to separate pioctl, or enhance pioctl */
+ cache_bypass_strategy = LARGE_FILES_BYPASS_CACHE;
+ }
+
+ if (getting) {
+ /* Return the current size threshold */
+ afs_int32 oldThreshold = cache_bypass_threshold;
+ memcpy(aout, (char *)&oldThreshold, sizeof(afs_int32));
+ *aoutSize = sizeof(afs_int32);
+ }
+
+ return(0);
+}
+
+#endif /* defined(AFS_CACHE_BYPASS) */
+
DECL_PIOCTL(PCallBackAddr)
{
#ifndef UKERNEL
afs_int32 C_SRXAFSCB_GetCacheConfig; /* afs_callback.c */
afs_int32 C_SRXAFSCB_GetCE64; /* afs_callback.c */
afs_int32 C_SRXAFSCB_GetCellByNum; /* afs_callback.c */
+#if defined(AFS_CACHE_BYPASS)
+ afs_int32 C_BPrefetchNoCache; /* afs_daemons.c */
+ afs_int32 C_afs_ReadNoCache; /* osi_vnodeops.c */
+#endif
};
struct afs_CMMeanStats {
tvc->vmh = tvc->segid = NULL;
tvc->credp = NULL;
#endif
+
+#if defined(AFS_CACHE_BYPASS)
+ tvc->cachingStates = 0;
+ tvc->cachingTransitions = 0;
+#endif
+
#ifdef AFS_BOZONLOCK_ENV
#if defined(AFS_SUN5_ENV)
rw_init(&tvc->rwlock, "vcache rwlock", RW_DEFAULT, NULL);
int ownerRWmode = 0600; /*Read/write OK by owner */
static int filesSet = 0; /*True if number of files explicitly set */
static int nFilesPerDir = 2048; /* # files per cache dir */
-static int nDaemons = 2; /* Number of background daemons */
+#if defined(AFS_CACHE_BYPASS)
+#define AFSD_NDAEMONS 4
+#else
+#define AFSD_NDAEMONS 2
+#endif
+static int nDaemons = AFSD_NDAEMONS; /* Number of background daemons */
static int chunkSize = 0; /* 2^chunkSize bytes per chunk */
static int dCacheSize; /* # of dcache entries */
static int vCacheSize = 200; /* # of volume cache entries */
#endif /* !defined(UKERNEL) */
+#if defined(AFS_CACHE_BYPASS)
+/* Uncoordinated 'O' pioctls */
+#define VIOC_SETBYPASS_THRESH _OVICEIOCTL(2) /* cache-bypass size thresh */
+#endif
+
#endif /* AFS_VENUS_H */
afs_analyze.o \
afs_axscache.o \
afs_buffer.o \
+ afs_bypasscache.o \
afs_callback.o \
afs_cbqueue.o \
afs_cell.o \
$(CRULE_OPT)
afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c
$(CRULE_OPT)
+afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c
+ $(CRULE_OPT)
afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c
$(CRULE_OPT)
afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c
$(UOBJ)/afs_error.o \
$(UOBJ)/afs_axscache.o \
$(UOBJ)/afs_buffer.o \
+ $(UOBJ)/afs_bypasscache.o \
$(UOBJ)/afs_callback.o \
$(UOBJ)/afs_cbqueue.o \
$(UOBJ)/afs_cell.o \
$(WEBOBJ)/afs_error.o \
$(WEBOBJ)/afs_axscache.o \
$(WEBOBJ)/afs_buffer.o \
+ $(WEBOBJ)/afs_bypasscache.o \
$(WEBOBJ)/afs_callback.o \
$(WEBOBJ)/afs_cbqueue.o \
$(WEBOBJ)/afs_cell.o \
$(WEBOBJ)/afs_error.o \
$(WEBOBJ)/afs_axscache.o \
$(WEBOBJ)/afs_buffer.o \
+ $(WEBOBJ)/afs_bypasscache.o \
$(WEBOBJ)/afs_callback.o \
$(WEBOBJ)/afs_cbqueue.o \
$(WEBOBJ)/afs_cell.o \
$(JUAFS)/afs_error.o \
$(JUAFS)/afs_axscache.o \
$(JUAFS)/afs_buffer.o \
+ $(JUAFS)/afs_bypasscache.o \
$(JUAFS)/afs_callback.o \
$(JUAFS)/afs_cbqueue.o \
$(JUAFS)/afs_cell.o \
$(CRULE1)
$(UOBJ)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c
$(CRULE1)
+$(UOBJ)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c
+ $(CRULE1)
$(UOBJ)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c
$(CRULE1)
$(UOBJ)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c
$(CRULE2)
$(WEBOBJ)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c
$(CRULE2)
+$(WEBOBJ)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c
+ $(CRULE2)
$(WEBOBJ)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c
$(CRULE2)
$(WEBOBJ)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c
$(CRULE1)
$(JUAFS)/afs_buffer.o: $(TOP_SRC_AFS)/afs_buffer.c
$(CRULE1)
+$(JUAFS)/afs_bypasscache.o: $(TOP_SRC_AFS)/afs_bypasscache.c
+ $(CRULE1)
$(JUAFS)/afs_cell.o: $(TOP_SRC_AFS)/afs_cell.c
$(CRULE1)
$(JUAFS)/afs_conn.o: $(TOP_SRC_AFS)/afs_conn.c
return 0;
}
+#if defined(AFS_CACHE_BYPASS)
+/*
+ * Set cache-bypass threshold. Files larger than this size will not be cached.
+ * With a threshold of 0, the cache is always bypassed. With a threshold of -1,
+ * cache bypass is disabled.
+ */
+
+static int
+BypassThresholdCmd(struct cmd_syndesc *as, char *arock)
+{
+ afs_int32 code;
+ afs_int32 size;
+ struct ViceIoctl blob;
+ afs_int32 threshold_i, threshold_o;
+ char *tp;
+
+ /* if new threshold supplied, then set and confirm, else,
+ * get current threshold and print
+ */
+
+ if(as->parms[0].items) {
+ int digit, ix, len;
+
+ tp = as->parms[0].items->data;
+ len = strlen(tp);
+ digit = 1;
+ for(ix = 0; ix < len; ++ix) {
+ if(!isdigit(tp[0])) {
+ digit = 0;
+ break;
+ }
+ }
+ if (digit == 0) {
+ fprintf(stderr, "fs bypassthreshold -size: %s must be an undecorated digit string.\n", tp);
+ return EINVAL;
+ }
+ threshold_i = atoi(tp);
+ if(ix > 9 && threshold_i < 2147483647)
+ threshold_i = 2147483647;
+ blob.in = (char *) &threshold_i;
+ blob.in_size = sizeof(threshold_i);
+ } else {
+ blob.in = NULL;
+ blob.in_size = 0;
+ }
+
+ blob.out = (char *) &threshold_o;
+ blob.out_size = sizeof(threshold_o);
+ code = pioctl(0, VIOC_SETBYPASS_THRESH, &blob, 1);
+ if (code) {
+ Die(errno, NULL);
+ return 1;
+ } else {
+ printf("Cache bypass threshold %d", threshold_o);
+ if(threshold_o == -1)
+ printf(" (disabled)");
+ printf("\n");
+ }
+
+ return 0;
+}
+
+#endif
+
static int
FlushCmd(struct cmd_syndesc *as, void *arock)
{
cmd_AddParm(ts, "-fast", CMD_FLAG, CMD_OPTIONAL,
"don't check name with VLDB");
+#if defined(AFS_CACHE_BYPASS)
+ ts = cmd_CreateSyntax("bypassthreshold", BypassThresholdCmd, 0,
+ "get/set cache bypass file size threshold");
+ cmd_AddParm(ts, "-size", CMD_SINGLE, CMD_OPTIONAL, "file size");
+#endif
+
/*
defect 3069